diff options
Diffstat (limited to 'cddl/contrib/opensolaris/lib')
129 files changed, 78706 insertions, 0 deletions
diff --git a/cddl/contrib/opensolaris/lib/libctf/common/ctf.5 b/cddl/contrib/opensolaris/lib/libctf/common/ctf.5 new file mode 100644 index 0000000..316e978 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libctf/common/ctf.5 @@ -0,0 +1,1140 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright (c) 2014 Joyent, Inc. +.\" +.Dd Sep 26, 2014 +.Dt CTF 5 +.Os +.Sh NAME +.Nm ctf +.Nd Compact C Type Format +.Sh SYNOPSIS +.In sys/ctf.h +.Sh DESCRIPTION +.Nm +is designed to be a compact representation of the C programming +language's type information focused on serving the needs of dynamic +tracing, debuggers, and other in-situ and post-mortem introspection +tools. +.Nm +data is generally included in +.Sy ELF +objects and is tagged as +.Sy SHT_PROGBITS +to ensure that the data is accessible in a running process and in subsequent +core dumps, if generated. +.Lp +The +.Nm +data contained in each file has information about the layout and +sizes of C types, including intrinsic types, enumerations, structures, +typedefs, and unions, that are used by the corresponding +.Sy ELF +object. The +.Nm +data may also include information about the types of global objects and +the return type and arguments of functions in the symbol table. +.Lp +Because a +.Nm +file is often embedded inside a file, rather than being a standalone +file itself, it may also be referred to as a +.Nm +.Sy container . +.Lp +On illumos systems, +.Nm +data is consumed by multiple programs. It can be used by the modular +debugger, +.Xr mdb 1 , +as well as by +.Xr dtrace 1M . +Programmatic access to +.Nm +data can be obtained through +.Xr libctf 3LIB . +.Lp +The +.Nm +file format is broken down into seven different sections. The first +section is the +.Sy preamble +and +.Sy header , +which describes the version of the +.Nm +file, links it has to other +.Nm +files, and the sizes of the other sections. The next section is the +.Sy label +section, +which provides a way of identifying similar groups of +.Nm +data across multiple files. This is followed by the +.Sy object +information section, which describes the type of global +symbols. The subsequent section is the +.Sy function +information section, which describes the return +types and arguments of functions. The next section is the +.Sy type +information section, which describes +the format and layout of the C types themselves, and finally the last +section is the +.Sy string +section, which contains the names of types, enumerations, members, and +labels. +.Lp +While strictly speaking, only the +.Sy preamble +and +.Sy header +are required, to be actually useful, both the type and string +sections are necessary. +.Lp +A +.Nm +file may contain all of the type information that it requires, or it +may optionally refer to another +.Nm +file which holds the remaining types. When a +.Nm +file refers to another file, it is called the +.Sy child +and the file it refers to is called the +.Sy parent . +A given file may only refer to one parent. This process is called +.Em uniquification +because it ensures each child only has type information that is +unique to it. A common example of this is that most kernel modules in +illumos are uniquified against the kernel module +.Sy genunix +and the type information that comes from the +.Sy IP +module. This means that a module only has types that are unique to +itself and the most common types in the kernel are not duplicated. +.Sh FILE FORMAT +This documents version +.Em two +of the +.Nm +file format. All applications and tools currently produce and operate on +this version. +.Lp +The file format can be summarized with the following image, the +following sections will cover this in more detail. +.Bd -literal + + +-------------+ 0t0 ++--------| Preamble | +| +-------------+ 0t4 +|+-------| Header | +|| +-------------+ 0t36 + cth_lbloff +||+------| Labels | +||| +-------------+ 0t36 + cth_objtoff +|||+-----| Objects | +|||| +-------------+ 0t36 + cth_funcoff +||||+----| Functions | +||||| +-------------+ 0t36 + cth_typeoff +|||||+---| Types | +|||||| +-------------+ 0t36 + cth_stroff +||||||+--| Strings | +||||||| +-------------+ 0t36 + cth_stroff + cth_strlen +||||||| +||||||| +||||||| +||||||| +-- magic - vers flags +||||||| | | | | +||||||| +------+------+------+------+ ++---------| 0xcf | 0xf1 | 0x02 | 0x00 | + |||||| +------+------+------+------+ + |||||| 0 1 2 3 4 + |||||| + |||||| + parent label + objects + |||||| | + parent name | + functions + strings + |||||| | | + label | | + types | + strlen + |||||| | | | | | | | | + |||||| +------+------+------+------+------+-------+-------+-------+ + +--------| 0x00 | 0x00 | 0x00 | 0x08 | 0x36 | 0x110 | 0x5f4 | 0x611 | + ||||| +------+------+------+------+------+-------+-------+-------+ + ||||| 0x04 0x08 0x0c 0x10 0x14 0x18 0x1c 0x20 0x24 + ||||| + ||||| + Label name + ||||| | + Label type + ||||| | | + Next label + ||||| | | | + ||||| +-------+------+-----+ + +-----------| 0x01 | 0x42 | ... | + |||| +-------+------+-----+ + |||| cth_lbloff +0x4 +0x8 cth_objtoff + |||| + |||| + |||| Symidx 0t15 0t43 0t44 + |||| +------+------+------+-----+ + +----------| 0x00 | 0x42 | 0x36 | ... | + ||| +------+------+------+-----+ + ||| cth_objtoff +0x2 +0x4 +0x6 cth_funcoff + ||| + ||| + CTF_TYPE_INFO + CTF_TYPE_INFO + ||| | + Return type | + ||| | | + arg0 | + ||| +--------+------+------+-----+ + +---------| 0x2c10 | 0x08 | 0x0c | ... | + || +--------+------+------+-----+ + || cth_funcff +0x2 +0x4 +0x6 cth_typeoff + || + || + ctf_stype_t for type 1 + || | integer + integer encoding + || | | + ctf_stype_t for type 2 + || | | | + || +--------------------+-----------+-----+ + +--------| 0x19 * 0xc01 * 0x0 | 0x1000000 | ... | + | +--------------------+-----------+-----+ + | cth_typeoff +0x08 +0x0c cth_stroff + | + | +--- str 0 + | | +--- str 1 + str 2 + | | | | + | v v v + | +----+---+---+---+----+---+---+---+---+---+----+ + +---| \\0 | i | n | t | \\0 | f | o | o | _ | t | \\0 | + +----+---+---+---+----+---+---+---+---+---+----+ + 0 1 2 3 4 5 6 7 8 9 10 11 +.Ed +.Lp +Every +.Nm +file begins with a +.Sy preamble , +followed by a +.Sy header . +The +.Sy preamble +is defined as follows: +.Bd -literal +typedef struct ctf_preamble { + ushort_t ctp_magic; /* magic number (CTF_MAGIC) */ + uchar_t ctp_version; /* data format version number (CTF_VERSION) */ + uchar_t ctp_flags; /* flags (see below) */ +} ctf_preamble_t; +.Ed +.Pp +The +.Sy preamble +is four bytes long and must be four byte aligned. +This +.Sy preamble +defines the version of the +.Nm +file which defines the format of the rest of the header. While the +header may change in subsequent versions, the preamble will not change +across versions, though the interpretation of its flags may change from +version to version. The +.Em ctp_magic +member defines the magic number for the +.Nm +file format. This must always be +.Li 0xcff1 . +If another value is encountered, then the file should not be treated as +a +.Nm +file. The +.Em ctp_version +member defines the version of the +.Nm +file. The current version is +.Li 2 . +It is possible to encounter an unsupported version. In that case, +software should not try to parse the format, as it may have changed. +Finally, the +.Em ctp_flags +member describes aspects of the file which modify its interpretation. +The following flags are currently defined: +.Bd -literal +#define CTF_F_COMPRESS 0x01 +.Ed +.Pp +The flag +.Sy CTF_F_COMPRESS +indicates that the body of the +.Nm +file, all the data following the +.Sy header , +has been compressed through the +.Sy zlib +library and its +.Sy deflate +algorithm. If this flag is not present, then the body has not been +compressed and no special action is needed to interpret it. All offsets +into the data as described by +.Sy header , +always refer to the +.Sy uncompressed +data. +.Lp +In version two of the +.Nm +file format, the +.Sy header +denotes whether whether or not this +.Nm +file is the child of another +.Nm +file and also indicates the size of the remaining sections. The +structure for the +.Sy header , +logically contains a copy of the +.Sy preamble +and the two have a combined size of 36 bytes. +.Bd -literal +typedef struct ctf_header { + ctf_preamble_t cth_preamble; + uint_t cth_parlabel; /* ref to name of parent lbl uniq'd against */ + uint_t cth_parname; /* ref to basename of parent */ + uint_t cth_lbloff; /* offset of label section */ + uint_t cth_objtoff; /* offset of object section */ + uint_t cth_funcoff; /* offset of function section */ + uint_t cth_typeoff; /* offset of type section */ + uint_t cth_stroff; /* offset of string section */ + uint_t cth_strlen; /* length of string section in bytes */ +} ctf_header_t; +.Ed +.Pp +After the +.Sy preamble , +the next two members +.Em cth_parlablel +and +.Em cth_parname , +are used to identify the parent. The value of both members are offsets +into the +.Sy string +section which point to the start of a null-terminated string. For more +information on the encoding of strings, see the subsection on +.Sx String Identifiers . +If the value of either is zero, then there is no entry for that +member. If the member +.Em cth_parlabel +is set, then the +.Em ctf_parname +member must be set, otherwise it will not be possible to find the +parent. If +.Em ctf_parname +is set, it is not necessary to define +.Em cth_parlabel , +as the parent may not have a label. For more information on labels +and their interpretation, see +.Sx The Label Section . +.Lp +The remaining members (excepting +.Em cth_strlen ) +describe the beginning of the corresponding sections. These offsets are +relative to the end of the +.Sy header . +Therefore, something with an offset of 0 is at an offset of thirty-six +bytes relative to the start of the +.Nm +file. The difference between members +indicates the size of the section itself. Different offsets have +different alignment requirements. The start of the +.Em cth_objotoff +and +.Em cth_funcoff +must be two byte aligned, while the sections +.Em cth_lbloff +and +.Em cth_typeoff +must be four-byte aligned. The section +.Em cth_stroff +has no alignment requirements. To calculate the size of a given section, +excepting the +.Sy string +section, one should subtract the offset of the section from the following one. For +example, the size of the +.Sy types +section can be calculated by subtracting +.Em cth_stroff +from +.Em cth_typeoff . +.Lp +Finally, the member +.Em cth_strlen +describes the length of the string section itself. From it, you can also +calculate the size of the entire +.Nm +file by adding together the size of the +.Sy ctf_header_t , +the offset of the string section in +.Em cth_stroff , +and the size of the string section in +.Em cth_srlen . +.Ss Type Identifiers +Through the +.Nm ctf +data, types are referred to by identifiers. A given +.Nm +file supports up to 32767 (0x7fff) types. The first valid type identifier is 0x1. +When a given +.Nm +file is a child, indicated by a non-zero entry for the +.Sy header Ns 's +.Em cth_parname , +then the first valid type identifier is 0x8000 and the last is 0xffff. +In this case, type identifiers 0x1 through 0x7fff are references to the +parent. +.Lp +The type identifier zero is a sentinel value used to indicate that there +is no type information available or it is an unknown type. +.Lp +Throughout the file format, the identifier is stored in different sized +values; however, the minimum size to represent a given identifier is a +.Sy uint16_t . +Other consumers of +.Nm +information may use larger or opaque identifiers. +.Ss String Identifiers +String identifiers are always encoded as four byte unsigned integers +which are an offset into a string table. The +.Nm +format supports two different string tables which have an identifier of +zero or one. This identifier is stored in the high-order bit of the +unsigned four byte offset. Therefore, the maximum supported offset into +one of these tables is 0x7ffffffff. +.Lp +Table identifier zero, always refers to the +.Sy string +section in the CTF file itself. String table identifier one refers to an +external string table which is the ELF string table for the ELF symbol +table associated with the +.Nm +container. +.Ss Type Encoding +Every +.Nm +type begins with metadata encoded into a +.Sy uint16_t . +This encoded information tells us three different pieces of information: +.Bl -bullet -offset indent -compact +.It +The kind of the type +.It +Whether this type is a root type or not +.It +The length of the variable data +.El +.Lp +The 16 bits that make up the encoding are broken down such that you have +five bits for the kind, one bit for indicating whether or not it is a +root type, and 10 bits for the variable length. This is laid out as +follows: +.Bd -literal -offset indent ++--------------------+ +| kind | root | vlen | ++--------------------+ +15 11 10 9 0 +.Ed +.Lp +The current version of the file format defines 14 different kinds. The +interpretation of these different kinds will be discussed in the section +.Sx The Type Section . +If a kind is encountered that is not listed below, then it is not a valid +.Nm +file. The kinds are defined as follows: +.Bd -literal -offset indent +#define CTF_K_UNKNOWN 0 +#define CTF_K_INTEGER 1 +#define CTF_K_FLOAT 2 +#define CTF_K_POINTER 3 +#define CTF_K_ARRAY 4 +#define CTF_K_FUNCTION 5 +#define CTF_K_STRUCT 6 +#define CTF_K_UNION 7 +#define CTF_K_ENUM 8 +#define CTF_K_FORWARD 9 +#define CTF_K_TYPEDEF 10 +#define CTF_K_VOLATILE 11 +#define CTF_K_CONST 12 +#define CTF_K_RESTRICT 13 +.Ed +.Lp +Programs directly reference many types; however, other types are referenced +indirectly because they are part of some other structure. These types that are +referenced directly and used are called +.Sy root +types. Other types may be used indirectly, for example, a program may reference +a structure directly, but not one of its members which has a type. That type is +not considered a +.Sy root +type. If a type is a +.Sy root +type, then it will have bit 10 set. +.Lp +The variable length section is specific to each kind and is discussed in the +section +.Sx The Type Section . +.Lp +The following macros are useful for constructing and deconstructing the encoded +type information: +.Bd -literal -offset indent + +#define CTF_MAX_VLEN 0x3ff +#define CTF_INFO_KIND(info) (((info) & 0xf800) >> 11) +#define CTF_INFO_ISROOT(info) (((info) & 0x0400) >> 10) +#define CTF_INFO_VLEN(info) (((info) & CTF_MAX_VLEN)) + +#define CTF_TYPE_INFO(kind, isroot, vlen) \\ + (((kind) << 11) | (((isroot) ? 1 : 0) << 10) | ((vlen) & CTF_MAX_VLEN)) +.Ed +.Ss The Label Section +When consuming +.Nm +data, it is often useful to know whether two different +.Nm +containers come from the same source base and version. For example, when +building illumos, there are many kernel modules that are built against a +single collection of source code. A label is encoded into the +.Nm +files that corresponds with the particular build. This ensures that if +files on the system were to become mixed up from multiple releases, that +they are not used together by tools, particularly when a child needs to +refer to a type in the parent. Because they are linked used the type +identifiers, if the wrong parent is used then the wrong type will be +encountered. +.Lp +Each label is encoded in the file format using the following eight byte +structure: +.Bd -literal +typedef struct ctf_lblent { + uint_t ctl_label; /* ref to name of label */ + uint_t ctl_typeidx; /* last type associated with this label */ +} ctf_lblent_t; +.Ed +.Lp +Each label has two different components, a name and a type identifier. +The name is encoded in the +.Em ctl_label +member which is in the format defined in the section +.Sx String Identifiers . +Generally, the names of all labels are found in the internal string +section. +.Lp +The type identifier encoded in the member +.Em ctl_typeidx +refers to the last type identifier that a label refers to in the current +file. Labels only refer to types in the current file, if the +.Nm +file is a child, then it will have the same label as its parent; +however, its label will only refer to its types, not its parents. +.Lp +It is also possible, though rather uncommon, for a +.Nm +file to have multiple labels. Labels are placed one after another, every +eight bytes. When multiple labels are present, types may only belong to +a single label. +.Ss The Object Section +The object section provides a mapping from ELF symbols of type +.Sy STT_OBJECT +in the symbol table to a type identifier. Every entry in this section is +a +.Sy uint16_t +which contains a type identifier as described in the section +.Sx Type Identifiers . +If there is no information for an object, then the type identifier 0x0 +is stored for that entry. +.Lp +To walk the object section, you need to have a corresponding +.Sy symbol table +in the ELF object that contains the +.Nm +data. Not every object is included in this section. Specifically, when +walking the symbol table. An entry is skipped if it matches any of the +following conditions: +.Lp +.Bl -bullet -offset indent -compact +.It +The symbol type is not +.Sy STT_OBJECT +.It +The symbol's section index is +.Sy SHN_UNDEF +.It +The symbol's name offset is zero +.It +The symbol's section index is +.Sy SHN_ABS +and the value of the symbol is zero. +.It +The symbol's name is +.Li _START_ +or +.Li _END_ . +These are skipped because they are used for scoping local symbols in +ELF. +.El +.Lp +The following sample code shows an example of iterating the object +section and skipping the correct symbols: +.Bd -literal +#include <gelf.h> +#include <stdio.h> + +/* + * Given the start of the object section in the CTF file, the number of symbols, + * and the ELF Data sections for the symbol table and the string table, this + * prints the type identifiers that correspond to objects. Note, a more robust + * implementation should ensure that they don't walk beyond the end of the CTF + * object section. + */ +static int +walk_symbols(uint16_t *objtoff, Elf_Data *symdata, Elf_Data *strdata, + long nsyms) +{ + long i; + uintptr_t strbase = strdata->d_buf; + + for (i = 1; i < nsyms; i++, objftoff++) { + const char *name; + GElf_Sym sym; + + if (gelf_getsym(symdata, i, &sym) == NULL) + return (1); + + if (GELF_ST_TYPE(sym.st_info) != STT_OBJECT) + continue; + if (sym.st_shndx == SHN_UNDEF || sym.st_name == 0) + continue; + if (sym.st_shndx == SHN_ABS && sym.st_value == 0) + continue; + name = (const char *)(strbase + sym.st_name); + if (strcmp(name, "_START_") == 0 || strcmp(name, "_END_") == 0) + continue; + + (void) printf("Symbol %d has type %d\n", i, *objtoff); + } + + return (0); +} +.Ed +.Ss The Function Section +The function section of the +.Nm +file encodes the types of both the function's arguments and the function's +return type. Similar to +.Sx The Object Section , +the function section encodes information for all symbols of type +.Sy STT_FUNCTION , +excepting those that fit specific criteria. Unlike with objects, because +functions have a variable number of arguments, they start with a type encoding +as defined in +.Sx Type Encoding , +which is the size of a +.Sy uint16_t . +For functions which have no type information available, they are encoded as +.Li CTF_TYPE_INFO(CTF_K_UNKNOWN, 0, 0) . +Functions with arguments are encoded differently. Here, the variable length is +turned into the number of arguments in the function. If a function is a +.Sy varargs +type function, then the number of arguments is increased by one. Functions with +type information are encoded as: +.Li CTF_TYPE_INFO(CTF_K_FUNCTION, 0, nargs) . +.Lp +For functions that have no type information, nothing else is encoded, and the +next function is encoded. For functions with type information, the next +.Sy uint16_t +is encoded with the type identifier of the return type of the function. It is +followed by each of the type identifiers of the arguments, if any exist, in the +order that they appear in the function. Therefore, argument 0 is the first type +identifier and so on. When a function has a final varargs argument, that is +encoded with the type identifier of zero. +.Lp +Like +.Sx The Object Section , +the function section is encoded in the order of the symbol table. It has +similar, but slightly different considerations from objects. While iterating the +symbol table, if any of the following conditions are true, then the entry is +skipped and no corresponding entry is written: +.Lp +.Bl -bullet -offset indent -compact +.It +The symbol type is not +.Sy STT_FUNCTION +.It +The symbol's section index is +.Sy SHN_UNDEF +.It +The symbol's name offset is zero +.It +The symbol's name is +.Li _START_ +or +.Li _END_ . +These are skipped because they are used for scoping local symbols in +ELF. +.El +.Ss The Type Section +The type section is the heart of the +.Nm +data. It encodes all of the information about the types themselves. The base of +the type information comes in two forms, a short form and a long form, each of +which may be followed by a variable number of arguments. The following +definitions describe the short and long forms: +.Bd -literal +#define CTF_MAX_SIZE 0xfffe /* max size of a type in bytes */ +#define CTF_LSIZE_SENT 0xffff /* sentinel for ctt_size */ +#define CTF_MAX_LSIZE UINT64_MAX + +typedef struct ctf_stype { + uint_t ctt_name; /* reference to name in string table */ + ushort_t ctt_info; /* encoded kind, variant length */ + union { + ushort_t _size; /* size of entire type in bytes */ + ushort_t _type; /* reference to another type */ + } _u; +} ctf_stype_t; + +typedef struct ctf_type { + uint_t ctt_name; /* reference to name in string table */ + ushort_t ctt_info; /* encoded kind, variant length */ + union { + ushort_t _size; /* always CTF_LSIZE_SENT */ + ushort_t _type; /* do not use */ + } _u; + uint_t ctt_lsizehi; /* high 32 bits of type size in bytes */ + uint_t ctt_lsizelo; /* low 32 bits of type size in bytes */ +} ctf_type_t; + +#define ctt_size _u._size /* for fundamental types that have a size */ +#define ctt_type _u._type /* for types that reference another type */ +.Ed +.Pp +Type sizes are stored in +.Sy bytes . +The basic small form uses a +.Sy ushort_t +to store the number of bytes. If the number of bytes in a structure would exceed +0xfffe, then the alternate form, the +.Sy ctf_type_t , +is used instead. To indicate that the larger form is being used, the member +.Em ctt_size +is set to value of +.Sy CTF_LSIZE_SENT +(0xffff). In general, when going through the type section, consumers use the +.Sy ctf_type_t +structure, but pay attention to the value of the member +.Em ctt_size +to determine whether they should increment their scan by the size of the +.Sy ctf_stype_t +or +.Sy ctf_type_t . +Not all kinds of types use +.Sy ctt_size . +Those which do not, will always use the +.Sy ctf_stype_t +structure. The individual sections for each kind have more information. +.Lp +Types are written out in order. Therefore the first entry encountered has a type +id of 0x1, or 0x8000 if a child. The member +.Em ctt_name +is encoded as described in the section +.Sx String Identifiers . +The string that it points to is the name of the type. If the identifier points +to an empty string (one that consists solely of a null terminator) then the type +does not have a name, this is common with anonymous structures and unions that +only have a typedef to name them, as well as, pointers and qualifiers. +.Lp +The next member, the +.Em ctt_info , +is encoded as described in the section +.Sx Type Encoding . +The types kind tells us how to interpret the remaining data in the +.Sy ctf_type_t +and any variable length data that may exist. The rest of this section will be +broken down into the interpretation of the various kinds. +.Ss Encoding of Integers +Integers, which are of type +.Sy CTF_K_INTEGER , +have no variable length arguments. Instead, they are followed by a four byte +.Sy uint_t +which describes their encoding. All integers must be encoded with a variable +length of zero. The +.Em ctt_size +member describes the length of the integer in bytes. In general, integer sizes +will be rounded up to the closest power of two. +.Lp +The integer encoding contains three different pieces of information: +.Bl -bullet -offset indent -compact +.It +The encoding of the integer +.It +The offset in +.Sy bits +of the type +.It +The size in +.Sy bits +of the type +.El +.Pp +This encoding can be expressed through the following macros: +.Bd -literal -offset indent +#define CTF_INT_ENCODING(data) (((data) & 0xff000000) >> 24) +#define CTF_INT_OFFSET(data) (((data) & 0x00ff0000) >> 16) +#define CTF_INT_BITS(data) (((data) & 0x0000ffff)) + +#define CTF_INT_DATA(encoding, offset, bits) \\ + (((encoding) << 24) | ((offset) << 16) | (bits)) +.Ed +.Pp +The following flags are defined for the encoding at this time: +.Bd -literal -offset indent +#define CTF_INT_SIGNED 0x01 +#define CTF_INT_CHAR 0x02 +#define CTF_INT_BOOL 0x04 +#define CTF_INT_VARARGS 0x08 +.Ed +.Lp +By default, an integer is considered to be unsigned, unless it has the +.Sy CTF_INT_SIGNED +flag set. If the flag +.Sy CTF_INT_CHAR +is set, that indicates that the integer is of a type that stores character +data, for example the intrinsic C type +.Sy char +would have the +.Sy CTF_INT_CHAR +flag set. If the flag +.Sy CTF_INT_BOOL +is set, that indicates that the integer represents a boolean type. For example, +the intrinsic C type +.Sy _Bool +would have the +.Sy CTF_INT_BOOL +flag set. Finally, the flag +.Sy CTF_INT_VARARGS +indicates that the integer is used as part of a variable number of arguments. +This encoding is rather uncommon. +.Ss Encoding of Floats +Floats, which are of type +.Sy CTF_K_FLOAT , +are similar to their integer counterparts. They have no variable length +arguments and are followed by a four byte encoding which describes the kind of +float that exists. The +.Em ctt_size +member is the size, in bytes, of the float. The float encoding has three +different pieces of information inside of it: +.Lp +.Bl -bullet -offset indent -compact +.It +The specific kind of float that exists +.It +The offset in +.Sy bits +of the float +.It +The size in +.Sy bits +of the float +.El +.Lp +This encoding can be expressed through the following macros: +.Bd -literal -offset indent +#define CTF_FP_ENCODING(data) (((data) & 0xff000000) >> 24) +#define CTF_FP_OFFSET(data) (((data) & 0x00ff0000) >> 16) +#define CTF_FP_BITS(data) (((data) & 0x0000ffff)) + +#define CTF_FP_DATA(encoding, offset, bits) \\ + (((encoding) << 24) | ((offset) << 16) | (bits)) +.Ed +.Lp +Where as the encoding for integers was a series of flags, the encoding for +floats maps to a specific kind of float. It is not a flag-based value. The kinds of floats +correspond to both their size, and the encoding. This covers all of the basic C +intrinsic floating point types. The following are the different kinds of floats +represented in the encoding: +.Bd -literal -offset indent +#define CTF_FP_SINGLE 1 /* IEEE 32-bit float encoding */ +#define CTF_FP_DOUBLE 2 /* IEEE 64-bit float encoding */ +#define CTF_FP_CPLX 3 /* Complex encoding */ +#define CTF_FP_DCPLX 4 /* Double complex encoding */ +#define CTF_FP_LDCPLX 5 /* Long double complex encoding */ +#define CTF_FP_LDOUBLE 6 /* Long double encoding */ +#define CTF_FP_INTRVL 7 /* Interval (2x32-bit) encoding */ +#define CTF_FP_DINTRVL 8 /* Double interval (2x64-bit) encoding */ +#define CTF_FP_LDINTRVL 9 /* Long double interval (2x128-bit) encoding */ +#define CTF_FP_IMAGRY 10 /* Imaginary (32-bit) encoding */ +#define CTF_FP_DIMAGRY 11 /* Long imaginary (64-bit) encoding */ +#define CTF_FP_LDIMAGRY 12 /* Long double imaginary (128-bit) encoding */ +.Ed +.Ss Encoding of Arrays +Arrays, which are of type +.Sy CTF_K_ARRAY , +have no variable length arguments. They are followed by a structure which +describes the number of elements in the array, the type identifier of the +elements in the array, and the type identifier of the index of the array. With +arrays, the +.Em ctt_size +member is set to zero. The structure that follows an array is defined as: +.Bd -literal +typedef struct ctf_array { + ushort_t cta_contents; /* reference to type of array contents */ + ushort_t cta_index; /* reference to type of array index */ + uint_t cta_nelems; /* number of elements */ +} ctf_array_t; +.Ed +.Lp +The +.Em cta_contents +and +.Em cta_index +members of the +.Sy ctf_array_t +are type identifiers which are encoded as per the section +.Sx Type Identifiers . +The member +.Em cta_nelems +is a simple four byte unsigned count of the number of elements. This count may +be zero when encountering C99's flexible array members. +.Ss Encoding of Functions +Function types, which are of type +.Sy CTF_K_FUNCTION , +use the variable length list to be the number of arguments in the function. When +the function has a final member which is a varargs, then the argument count is +incremented by one to account for the variable argument. Here, the +.Em ctt_type +member is encoded with the type identifier of the return type of the function. +Note that the +.Em ctt_size +member is not used here. +.Lp +The variable argument list contains the type identifiers for the arguments of +the function, if any. Each one is represented by a +.Sy uint16_t +and encoded according to the +.Sx Type Identifiers +section. If the function's last argument is of type varargs, then it is also +written out, but the type identifier is zero. This is included in the count of +the function's arguments. +.Ss Encoding of Structures and Unions +Structures and Unions, which are encoded with +.Sy CTF_K_STRUCT +and +.Sy CTF_K_UNION +respectively, are very similar constructs in C. The main difference +between them is the fact that every member of a structure follows one another, +where as in a union, all members share the same memory. They are also very +similar in terms of their encoding in +.Nm . +The variable length argument for structures and unions represents the number of +members that they have. The value of the member +.Em ctt_size +is the size of the structure and union. There are two different structures which +are used to encode members in the variable list. When the size of a structure or +union is greater than or equal to the large member threshold, 8192, then a +different structure is used to encode the member, all members are encoded using +the same structure. The structure for members is as follows: +.Bd -literal +typedef struct ctf_member { + uint_t ctm_name; /* reference to name in string table */ + ushort_t ctm_type; /* reference to type of member */ + ushort_t ctm_offset; /* offset of this member in bits */ +} ctf_member_t; + +typedef struct ctf_lmember { + uint_t ctlm_name; /* reference to name in string table */ + ushort_t ctlm_type; /* reference to type of member */ + ushort_t ctlm_pad; /* padding */ + uint_t ctlm_offsethi; /* high 32 bits of member offset in bits */ + uint_t ctlm_offsetlo; /* low 32 bits of member offset in bits */ +} ctf_lmember_t; +.Ed +.Lp +Both the +.Em ctm_name +and +.Em ctlm_name +refer to the name of the member. The name is encoded as an offset into the +string table as described by the section +.Sx String Identifiers . +The members +.Sy ctm_type +and +.Sy ctlm_type +both refer to the type of the member. They are encoded as per the section +.Sx Type Identifiers . +.Lp +The last piece of information that is present is the offset which describes the +offset in memory that the member begins at. For unions, this value will always +be zero because the start of unions in memory is always zero. For structures, +this is the offset in +.Sy bits +that the member begins at. Note that a compiler may lay out a type with padding. +This means that the difference in offset between two consecutive members may be +larger than the size of the member. When the size of the overall structure is +strictly less than 8192 bytes, the normal structure, +.Sy ctf_member_t , +is used and the offset in bits is stored in the member +.Em ctm_offset . +However, when the size of the structure is greater than or equal to 8192 bytes, +then the number of bits is split into two 32-bit quantities. One member, +.Em ctlm_offsethi , +represents the upper 32 bits of the offset, while the other member, +.Em ctlm_offsetlo , +represents the lower 32 bits of the offset. These can be joined together to get +a 64-bit sized offset in bits by shifting the member +.Em ctlm_offsethi +to the left by thirty two and then doing a binary or of +.Em ctlm_offsetlo . +.Ss Encoding of Enumerations +Enumerations, noted by the type +.Sy CTF_K_ENUM , +are similar to structures. Enumerations use the variable list to note the number +of values that the enumeration contains, which we'll term enumerators. In C, an +enumeration is always equivalent to the intrinsic type +.Sy int , +thus the value of the member +.Em ctt_size +is always the size of an integer which is determined based on the current model. +For illumos systems, this will always be 4, as an integer is always defined to +be 4 bytes large in both +.Sy ILP32 +and +.Sy LP64 , +regardless of the architecture. +.Lp +The enumerators encoded in an enumeration have the following structure in the +variable list: +.Bd -literal +typedef struct ctf_enum { + uint_t cte_name; /* reference to name in string table */ + int cte_value; /* value associated with this name */ +} ctf_enum_t; +.Ed +.Pp +The member +.Em cte_name +refers to the name of the enumerator's value, it is encoded according to the +rules in the section +.Sx String Identifiers . +The member +.Em cte_value +contains the integer value of this enumerator. +.Ss Encoding of Forward References +Forward references, types of kind +.Sy CTF_K_FORWARD , +in a +.Nm +file refer to types which may not have a definition at all, only a name. If +the +.Nm +file is a child, then it may be that the forward is resolved to an +actual type in the parent, otherwise the definition may be in another +.Nm +container or may not be known at all. The only member of the +.Sy ctf_type_t +that matters for a forward declaration is the +.Em ctt_name +which points to the name of the forward reference in the string table as +described earlier. There is no other information recorded for forward +references. +.Ss Encoding of Pointers, Typedefs, Volatile, Const, and Restrict +Pointers, typedefs, volatile, const, and restrict are all similar in +.Nm . +They all refer to another type. In the case of typedefs, they provide an +alternate name, while volatile, const, and restrict change how the type is +interpreted in the C programming language. This covers the +.Nm +kinds +.Sy CTF_K_POINTER , +.Sy CTF_K_TYPEDEF , +.Sy CTF_K_VOLATILE , +.Sy CTF_K_RESTRICT , +and +.Sy CTF_K_CONST . +.Lp +These types have no variable list entries and use the member +.Em ctt_type +to refer to the base type that they modify. +.Ss Encoding of Unknown Types +Types with the kind +.Sy CTF_K_UNKNOWN +are used to indicate gaps in the type identifier space. These entries consume an +identifier, but do not define anything. Nothing should refer to these gap +identifiers. +.Ss Dependencies Between Types +C types can be imagined as a directed, cyclic, graph. Structures and unions may +refer to each other in a way that creates a cyclic dependency. In cases such as +these, the entire type section must be read in and processed. Consumers must +not assume that every type can be laid out in dependency order; they +cannot. +.Ss The String Section +The last section of the +.Nm +file is the +.Sy string +section. This section encodes all of the strings that appear throughout +the other sections. It is laid out as a series of characters followed by +a null terminator. Generally, all names are written out in ASCII, as +most C compilers do not allow and characters to appear in identifiers +outside of a subset of ASCII. However, any extended characters sets +should be written out as a series of UTF-8 bytes. +.Lp +The first entry in the section, at offset zero, is a single null +terminator to reference the empty string. Following that, each C string +should be written out, including the null terminator. Offsets that refer +to something in this section should refer to the first byte which begins +a string. Beyond the first byte in the section being the null +terminator, the order of strings is unimportant. +.Ss Data Encoding and ELF Considerations +.Nm +data is generally included in ELF objects which specify information to +identify the architecture and endianness of the file. A +.Nm +container inside such an object must match the endianness of the ELF +object. Aside from the question of the endian encoding of data, there +should be no other differences between architectures. While many of the +types in this document refer to non-fixed size C integral types, they +are equivalent in the models +.Sy ILP32 +and +.Sy LP64 . +If any other model is being used with +.Nm +data that has different sizes, then it must not use the model's sizes for +those integral types and instead use the fixed size equivalents based on an +.Sy ILP32 +environment. +.Lp +When placing a +.Nm +container inside of an ELF object, there are certain conventions that are +expected for the purposes of tooling being able to find the +.Nm +data. In particular, a given ELF object should only contain a single +.Nm +section. Multiple containers should be merged together into a single +one. +.Lp +The +.Nm +file should be included in its own ELF section. The section's name +must be +.Ql .SUNW_ctf . +The type of the section must be +.Sy SHT_PROGBITS . +The section should have a link set to the symbol table and its address +alignment must be 4. +.Sh SEE ALSO +.Xr dtrace 1 , +.Xr elf 3 , +.Xr gelf 3 , +.Xr a.out 5 , +.Xr elf 5 diff --git a/cddl/contrib/opensolaris/lib/libctf/common/ctf_lib.c b/cddl/contrib/opensolaris/lib/libctf/common/ctf_lib.c new file mode 100644 index 0000000..5822267 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libctf/common/ctf_lib.c @@ -0,0 +1,527 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/zmod.h> +#include <ctf_impl.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#if defined(sun) +#include <dlfcn.h> +#else +#include <zlib.h> +#endif +#include <gelf.h> + +#if defined(sun) +#ifdef _LP64 +static const char *_libctf_zlib = "/usr/lib/64/libz.so"; +#else +static const char *_libctf_zlib = "/usr/lib/libz.so"; +#endif +#endif + +static struct { + int (*z_uncompress)(uchar_t *, ulong_t *, const uchar_t *, ulong_t); + const char *(*z_error)(int); + void *z_dlp; +} zlib; + +static size_t _PAGESIZE; +static size_t _PAGEMASK; + +#if defined(sun) +#pragma init(_libctf_init) +#else +void _libctf_init(void) __attribute__ ((constructor)); +#endif +void +_libctf_init(void) +{ +#if defined(sun) + const char *p = getenv("LIBCTF_DECOMPRESSOR"); + + if (p != NULL) + _libctf_zlib = p; /* use alternate decompression library */ +#endif + + _libctf_debug = getenv("LIBCTF_DEBUG") != NULL; + + _PAGESIZE = getpagesize(); + _PAGEMASK = ~(_PAGESIZE - 1); +} + +/* + * Attempt to dlopen the decompression library and locate the symbols of + * interest that we will need to call. This information in cached so + * that multiple calls to ctf_bufopen() do not need to reopen the library. + */ +void * +ctf_zopen(int *errp) +{ +#if defined(sun) + ctf_dprintf("decompressing CTF data using %s\n", _libctf_zlib); + + if (zlib.z_dlp != NULL) + return (zlib.z_dlp); /* library is already loaded */ + + if (access(_libctf_zlib, R_OK) == -1) + return (ctf_set_open_errno(errp, ECTF_ZMISSING)); + + if ((zlib.z_dlp = dlopen(_libctf_zlib, RTLD_LAZY | RTLD_LOCAL)) == NULL) + return (ctf_set_open_errno(errp, ECTF_ZINIT)); + + zlib.z_uncompress = (int (*)(uchar_t *, ulong_t *, const uchar_t *, ulong_t)) dlsym(zlib.z_dlp, "uncompress"); + zlib.z_error = (const char *(*)(int)) dlsym(zlib.z_dlp, "zError"); + + if (zlib.z_uncompress == NULL || zlib.z_error == NULL) { + (void) dlclose(zlib.z_dlp); + bzero(&zlib, sizeof (zlib)); + return (ctf_set_open_errno(errp, ECTF_ZINIT)); + } +#else + zlib.z_uncompress = uncompress; + zlib.z_error = zError; + + /* Dummy return variable as 'no error' */ + zlib.z_dlp = (void *) (uintptr_t) 1; +#endif + + return (zlib.z_dlp); +} + +/* + * The ctf_bufopen() routine calls these subroutines, defined by <sys/zmod.h>, + * which we then patch through to the functions in the decompression library. + */ +int +z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) +{ + return (zlib.z_uncompress(dst, (ulong_t *)dstlen, src, srclen)); +} + +const char * +z_strerror(int err) +{ + return (zlib.z_error(err)); +} + +/* + * Convert a 32-bit ELF file header into GElf. + */ +static void +ehdr_to_gelf(const Elf32_Ehdr *src, GElf_Ehdr *dst) +{ + bcopy(src->e_ident, dst->e_ident, EI_NIDENT); + dst->e_type = src->e_type; + dst->e_machine = src->e_machine; + dst->e_version = src->e_version; + dst->e_entry = (Elf64_Addr)src->e_entry; + dst->e_phoff = (Elf64_Off)src->e_phoff; + dst->e_shoff = (Elf64_Off)src->e_shoff; + dst->e_flags = src->e_flags; + dst->e_ehsize = src->e_ehsize; + dst->e_phentsize = src->e_phentsize; + dst->e_phnum = src->e_phnum; + dst->e_shentsize = src->e_shentsize; + dst->e_shnum = src->e_shnum; + dst->e_shstrndx = src->e_shstrndx; +} + +/* + * Convert a 32-bit ELF section header into GElf. + */ +static void +shdr_to_gelf(const Elf32_Shdr *src, GElf_Shdr *dst) +{ + dst->sh_name = src->sh_name; + dst->sh_type = src->sh_type; + dst->sh_flags = src->sh_flags; + dst->sh_addr = src->sh_addr; + dst->sh_offset = src->sh_offset; + dst->sh_size = src->sh_size; + dst->sh_link = src->sh_link; + dst->sh_info = src->sh_info; + dst->sh_addralign = src->sh_addralign; + dst->sh_entsize = src->sh_entsize; +} + +/* + * In order to mmap a section from the ELF file, we must round down sh_offset + * to the previous page boundary, and mmap the surrounding page. We store + * the pointer to the start of the actual section data back into sp->cts_data. + */ +const void * +ctf_sect_mmap(ctf_sect_t *sp, int fd) +{ + size_t pageoff = sp->cts_offset & ~_PAGEMASK; + + caddr_t base = mmap64(NULL, sp->cts_size + pageoff, PROT_READ, + MAP_PRIVATE, fd, sp->cts_offset & _PAGEMASK); + + if (base != MAP_FAILED) + sp->cts_data = base + pageoff; + + return (base); +} + +/* + * Since sp->cts_data has the adjusted offset, we have to again round down + * to get the actual mmap address and round up to get the size. + */ +void +ctf_sect_munmap(const ctf_sect_t *sp) +{ + uintptr_t addr = (uintptr_t)sp->cts_data; + uintptr_t pageoff = addr & ~_PAGEMASK; + + (void) munmap((void *)(addr - pageoff), sp->cts_size + pageoff); +} + +/* + * Open the specified file descriptor and return a pointer to a CTF container. + * The file can be either an ELF file or raw CTF file. The caller is + * responsible for closing the file descriptor when it is no longer needed. + */ +ctf_file_t * +ctf_fdopen(int fd, int *errp) +{ + ctf_sect_t ctfsect, symsect, strsect; + ctf_file_t *fp = NULL; + size_t shstrndx, shnum; + + struct stat64 st; + ssize_t nbytes; + + union { + ctf_preamble_t ctf; + Elf32_Ehdr e32; + GElf_Ehdr e64; + } hdr; + + bzero(&ctfsect, sizeof (ctf_sect_t)); + bzero(&symsect, sizeof (ctf_sect_t)); + bzero(&strsect, sizeof (ctf_sect_t)); + bzero(&hdr.ctf, sizeof (hdr)); + + if (fstat64(fd, &st) == -1) + return (ctf_set_open_errno(errp, errno)); + + if ((nbytes = pread64(fd, &hdr.ctf, sizeof (hdr), 0)) <= 0) + return (ctf_set_open_errno(errp, nbytes < 0? errno : ECTF_FMT)); + + /* + * If we have read enough bytes to form a CTF header and the magic + * string matches, attempt to interpret the file as raw CTF. + */ + if (nbytes >= (ssize_t) sizeof (ctf_preamble_t) && + hdr.ctf.ctp_magic == CTF_MAGIC) { + if (hdr.ctf.ctp_version > CTF_VERSION) + return (ctf_set_open_errno(errp, ECTF_CTFVERS)); + + ctfsect.cts_data = mmap64(NULL, st.st_size, PROT_READ, + MAP_PRIVATE, fd, 0); + + if (ctfsect.cts_data == MAP_FAILED) + return (ctf_set_open_errno(errp, errno)); + + ctfsect.cts_name = _CTF_SECTION; + ctfsect.cts_type = SHT_PROGBITS; + ctfsect.cts_flags = SHF_ALLOC; + ctfsect.cts_size = (size_t)st.st_size; + ctfsect.cts_entsize = 1; + ctfsect.cts_offset = 0; + + if ((fp = ctf_bufopen(&ctfsect, NULL, NULL, errp)) == NULL) + ctf_sect_munmap(&ctfsect); + + return (fp); + } + + /* + * If we have read enough bytes to form an ELF header and the magic + * string matches, attempt to interpret the file as an ELF file. We + * do our own largefile ELF processing, and convert everything to + * GElf structures so that clients can operate on any data model. + */ + if (nbytes >= (ssize_t) sizeof (Elf32_Ehdr) && + bcmp(&hdr.e32.e_ident[EI_MAG0], ELFMAG, SELFMAG) == 0) { +#if BYTE_ORDER == _BIG_ENDIAN + uchar_t order = ELFDATA2MSB; +#else + uchar_t order = ELFDATA2LSB; +#endif + GElf_Shdr *sp; + + void *strs_map; + size_t strs_mapsz, i; + char *strs; + + if (hdr.e32.e_ident[EI_DATA] != order) + return (ctf_set_open_errno(errp, ECTF_ENDIAN)); + if (hdr.e32.e_version != EV_CURRENT) + return (ctf_set_open_errno(errp, ECTF_ELFVERS)); + + if (hdr.e32.e_ident[EI_CLASS] == ELFCLASS64) { + if (nbytes < (ssize_t) sizeof (GElf_Ehdr)) + return (ctf_set_open_errno(errp, ECTF_FMT)); + } else { + Elf32_Ehdr e32 = hdr.e32; + ehdr_to_gelf(&e32, &hdr.e64); + } + + shnum = hdr.e64.e_shnum; + shstrndx = hdr.e64.e_shstrndx; + + /* Extended ELF sections */ + if ((shstrndx == SHN_XINDEX) || (shnum == 0)) { + if (hdr.e32.e_ident[EI_CLASS] == ELFCLASS32) { + Elf32_Shdr x32; + + if (pread64(fd, &x32, sizeof (x32), + hdr.e64.e_shoff) != sizeof (x32)) + return (ctf_set_open_errno(errp, + errno)); + + shnum = x32.sh_size; + shstrndx = x32.sh_link; + } else { + Elf64_Shdr x64; + + if (pread64(fd, &x64, sizeof (x64), + hdr.e64.e_shoff) != sizeof (x64)) + return (ctf_set_open_errno(errp, + errno)); + + shnum = x64.sh_size; + shstrndx = x64.sh_link; + } + } + + if (shstrndx >= shnum) + return (ctf_set_open_errno(errp, ECTF_CORRUPT)); + + nbytes = sizeof (GElf_Shdr) * shnum; + + if ((sp = malloc(nbytes)) == NULL) + return (ctf_set_open_errno(errp, errno)); + + /* + * Read in and convert to GElf the array of Shdr structures + * from e_shoff so we can locate sections of interest. + */ + if (hdr.e32.e_ident[EI_CLASS] == ELFCLASS32) { + Elf32_Shdr *sp32; + + nbytes = sizeof (Elf32_Shdr) * shnum; + + if ((sp32 = malloc(nbytes)) == NULL || pread64(fd, + sp32, nbytes, hdr.e64.e_shoff) != nbytes) { + free(sp); + return (ctf_set_open_errno(errp, errno)); + } + + for (i = 0; i < shnum; i++) + shdr_to_gelf(&sp32[i], &sp[i]); + + free(sp32); + + } else if (pread64(fd, sp, nbytes, hdr.e64.e_shoff) != nbytes) { + free(sp); + return (ctf_set_open_errno(errp, errno)); + } + + /* + * Now mmap the section header strings section so that we can + * perform string comparison on the section names. + */ + strs_mapsz = sp[shstrndx].sh_size + + (sp[shstrndx].sh_offset & ~_PAGEMASK); + + strs_map = mmap64(NULL, strs_mapsz, PROT_READ, MAP_PRIVATE, + fd, sp[shstrndx].sh_offset & _PAGEMASK); + + strs = (char *)strs_map + + (sp[shstrndx].sh_offset & ~_PAGEMASK); + + if (strs_map == MAP_FAILED) { + free(sp); + return (ctf_set_open_errno(errp, ECTF_MMAP)); + } + + /* + * Iterate over the section header array looking for the CTF + * section and symbol table. The strtab is linked to symtab. + */ + for (i = 0; i < shnum; i++) { + const GElf_Shdr *shp = &sp[i]; + const GElf_Shdr *lhp = &sp[shp->sh_link]; + + if (shp->sh_link >= shnum) + continue; /* corrupt sh_link field */ + + if (shp->sh_name >= sp[shstrndx].sh_size || + lhp->sh_name >= sp[shstrndx].sh_size) + continue; /* corrupt sh_name field */ + + if (shp->sh_type == SHT_PROGBITS && + strcmp(strs + shp->sh_name, _CTF_SECTION) == 0) { + ctfsect.cts_name = strs + shp->sh_name; + ctfsect.cts_type = shp->sh_type; + ctfsect.cts_flags = shp->sh_flags; + ctfsect.cts_size = shp->sh_size; + ctfsect.cts_entsize = shp->sh_entsize; + ctfsect.cts_offset = (off64_t)shp->sh_offset; + + } else if (shp->sh_type == SHT_SYMTAB) { + symsect.cts_name = strs + shp->sh_name; + symsect.cts_type = shp->sh_type; + symsect.cts_flags = shp->sh_flags; + symsect.cts_size = shp->sh_size; + symsect.cts_entsize = shp->sh_entsize; + symsect.cts_offset = (off64_t)shp->sh_offset; + + strsect.cts_name = strs + lhp->sh_name; + strsect.cts_type = lhp->sh_type; + strsect.cts_flags = lhp->sh_flags; + strsect.cts_size = lhp->sh_size; + strsect.cts_entsize = lhp->sh_entsize; + strsect.cts_offset = (off64_t)lhp->sh_offset; + } + } + + free(sp); /* free section header array */ + + if (ctfsect.cts_type == SHT_NULL) { + (void) munmap(strs_map, strs_mapsz); + return (ctf_set_open_errno(errp, ECTF_NOCTFDATA)); + } + + /* + * Now mmap the CTF data, symtab, and strtab sections and + * call ctf_bufopen() to do the rest of the work. + */ + if (ctf_sect_mmap(&ctfsect, fd) == MAP_FAILED) { + (void) munmap(strs_map, strs_mapsz); + return (ctf_set_open_errno(errp, ECTF_MMAP)); + } + + if (symsect.cts_type != SHT_NULL && + strsect.cts_type != SHT_NULL) { + if (ctf_sect_mmap(&symsect, fd) == MAP_FAILED || + ctf_sect_mmap(&strsect, fd) == MAP_FAILED) { + (void) ctf_set_open_errno(errp, ECTF_MMAP); + goto bad; /* unmap all and abort */ + } + fp = ctf_bufopen(&ctfsect, &symsect, &strsect, errp); + } else + fp = ctf_bufopen(&ctfsect, NULL, NULL, errp); +bad: + if (fp == NULL) { + ctf_sect_munmap(&ctfsect); + ctf_sect_munmap(&symsect); + ctf_sect_munmap(&strsect); + } else + fp->ctf_flags |= LCTF_MMAP; + + (void) munmap(strs_map, strs_mapsz); + return (fp); + } + + return (ctf_set_open_errno(errp, ECTF_FMT)); +} + +/* + * Open the specified file and return a pointer to a CTF container. The file + * can be either an ELF file or raw CTF file. This is just a convenient + * wrapper around ctf_fdopen() for callers. + */ +ctf_file_t * +ctf_open(const char *filename, int *errp) +{ + ctf_file_t *fp; + int fd; + + if ((fd = open64(filename, O_RDONLY)) == -1) { + if (errp != NULL) + *errp = errno; + return (NULL); + } + + fp = ctf_fdopen(fd, errp); + (void) close(fd); + return (fp); +} + +/* + * Write the uncompressed CTF data stream to the specified file descriptor. + * This is useful for saving the results of dynamic CTF containers. + */ +int +ctf_write(ctf_file_t *fp, int fd) +{ + const uchar_t *buf = fp->ctf_base; + ssize_t resid = fp->ctf_size; + ssize_t len; + + while (resid != 0) { + if ((len = write(fd, buf, resid)) <= 0) + return (ctf_set_errno(fp, errno)); + resid -= len; + buf += len; + } + + return (0); +} + +/* + * Set the CTF library client version to the specified version. If version is + * zero, we just return the default library version number. + */ +int +ctf_version(int version) +{ + if (version < 0) { + errno = EINVAL; + return (-1); + } + + if (version > 0) { + if (version > CTF_VERSION) { + errno = ENOTSUP; + return (-1); + } + ctf_dprintf("ctf_version: client using version %d\n", version); + _libctf_version = version; + } + + return (_libctf_version); +} diff --git a/cddl/contrib/opensolaris/lib/libctf/common/ctf_subr.c b/cddl/contrib/opensolaris/lib/libctf/common/ctf_subr.c new file mode 100644 index 0000000..e9f5ad7 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libctf/common/ctf_subr.c @@ -0,0 +1,83 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <ctf_impl.h> +#include <sys/mman.h> +#include <stdarg.h> + +void * +ctf_data_alloc(size_t size) +{ + return (mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0)); +} + +void +ctf_data_free(void *buf, size_t size) +{ + (void) munmap(buf, size); +} + +void +ctf_data_protect(void *buf, size_t size) +{ + (void) mprotect(buf, size, PROT_READ); +} + +void * +ctf_alloc(size_t size) +{ + return (malloc(size)); +} + +/*ARGSUSED*/ +void +ctf_free(void *buf, __unused size_t size) +{ + free(buf); +} + +const char * +ctf_strerror(int err) +{ + return ((const char *) strerror(err)); +} + +/*PRINTFLIKE1*/ +void +ctf_dprintf(const char *format, ...) +{ + if (_libctf_debug) { + va_list alist; + + va_start(alist, format); + (void) fputs("libctf DEBUG: ", stderr); + (void) vfprintf(stderr, format, alist); + va_end(alist); + } +} diff --git a/cddl/contrib/opensolaris/lib/libctf/common/libctf.h b/cddl/contrib/opensolaris/lib/libctf/common/libctf.h new file mode 100644 index 0000000..3fd6931 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libctf/common/libctf.h @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * This header file defines the interfaces available from the CTF debugger + * library, libctf. This library provides functions that a debugger can + * use to operate on data in the Compact ANSI-C Type Format (CTF). This + * is NOT a public interface, although it may eventually become one in + * the fullness of time after we gain more experience with the interfaces. + * + * In the meantime, be aware that any program linked with libctf in this + * release of Solaris is almost guaranteed to break in the next release. + * + * In short, do not user this header file or libctf for any purpose. + */ + +#ifndef _LIBCTF_H +#define _LIBCTF_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/ctf_api.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This flag can be used to enable debug messages. + */ +extern int _libctf_debug; + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBCTF_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/drti.c b/cddl/contrib/opensolaris/lib/libdtrace/common/drti.c new file mode 100644 index 0000000..ccd4f9b --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/drti.c @@ -0,0 +1,275 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2013 Voxer Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <unistd.h> +#include <fcntl.h> +#include <dlfcn.h> +#include <link.h> +#include <sys/dtrace.h> + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <libelf.h> +#include <gelf.h> + +/* + * In Solaris 10 GA, the only mechanism for communicating helper information + * is through the DTrace helper pseudo-device node in /devices; there is + * no /dev link. Because of this, USDT providers and helper actions don't + * work inside of non-global zones. This issue was addressed by adding + * the /dev and having this initialization code use that /dev link. If the + * /dev link doesn't exist it falls back to looking for the /devices node + * as this code may be embedded in a binary which runs on Solaris 10 GA. + * + * Users may set the following environment variable to affect the way + * helper initialization takes place: + * + * DTRACE_DOF_INIT_DEBUG enable debugging output + * DTRACE_DOF_INIT_DISABLE disable helper loading + * DTRACE_DOF_INIT_DEVNAME set the path to the helper node + */ + +static const char *devnamep = "/dev/dtrace/helper"; +#if defined(sun) +static const char *olddevname = "/devices/pseudo/dtrace@0:helper"; +#endif + +static const char *modname; /* Name of this load object */ +static int gen; /* DOF helper generation */ +#if defined(sun) +extern dof_hdr_t __SUNW_dof; /* DOF defined in the .SUNW_dof section */ +#endif +static boolean_t dof_init_debug = B_FALSE; /* From DTRACE_DOF_INIT_DEBUG */ + +static void +dprintf(int debug, const char *fmt, ...) +{ + va_list ap; + + if (debug && !dof_init_debug) + return; + + va_start(ap, fmt); + + if (modname == NULL) + (void) fprintf(stderr, "dtrace DOF: "); + else + (void) fprintf(stderr, "dtrace DOF %s: ", modname); + + (void) vfprintf(stderr, fmt, ap); + + if (fmt[strlen(fmt) - 1] != '\n') + (void) fprintf(stderr, ": %s\n", strerror(errno)); + + va_end(ap); +} + +#if defined(sun) +#pragma init(dtrace_dof_init) +#else +static void dtrace_dof_init(void) __attribute__ ((constructor)); +#endif + +static void +dtrace_dof_init(void) +{ +#if defined(sun) + dof_hdr_t *dof = &__SUNW_dof; +#else + dof_hdr_t *dof = NULL; +#endif +#ifdef _LP64 + Elf64_Ehdr *elf; +#else + Elf32_Ehdr *elf; +#endif + dof_helper_t dh; + Link_map *lmp; +#if defined(sun) + Lmid_t lmid; +#else + u_long lmid = 0; +#endif + int fd; + const char *p; +#if !defined(sun) + Elf *e; + Elf_Scn *scn = NULL; + Elf_Data *dofdata = NULL; + dof_hdr_t *dof_next = NULL; + GElf_Shdr shdr; + int efd; + char *s; + size_t shstridx; +#endif + + if (getenv("DTRACE_DOF_INIT_DISABLE") != NULL) + return; + + if (getenv("DTRACE_DOF_INIT_DEBUG") != NULL) + dof_init_debug = B_TRUE; + + if (dlinfo(RTLD_SELF, RTLD_DI_LINKMAP, &lmp) == -1 || lmp == NULL) { + dprintf(1, "couldn't discover module name or address\n"); + return; + } + +#if defined(sun) + if (dlinfo(RTLD_SELF, RTLD_DI_LMID, &lmid) == -1) { + dprintf(1, "couldn't discover link map ID\n"); + return; + } +#endif + + if ((modname = strrchr(lmp->l_name, '/')) == NULL) + modname = lmp->l_name; + else + modname++; +#if !defined(sun) + elf_version(EV_CURRENT); + if ((efd = open(lmp->l_name, O_RDONLY, 0)) < 0) { + dprintf(1, "couldn't open file for reading\n"); + return; + } + if ((e = elf_begin(efd, ELF_C_READ, NULL)) == NULL) { + dprintf(1, "elf_begin failed\n"); + close(efd); + return; + } + elf_getshdrstrndx(e, &shstridx); + dof = NULL; + while ((scn = elf_nextscn(e, scn)) != NULL) { + gelf_getshdr(scn, &shdr); + if (shdr.sh_type == SHT_SUNW_dof) { + s = elf_strptr(e, shstridx, shdr.sh_name); + if (s != NULL && strcmp(s, ".SUNW_dof") == 0) { + dofdata = elf_getdata(scn, NULL); + dof = dofdata->d_buf; + } + } + } + if (dof == NULL) { + dprintf(1, "SUNW_dof section not found\n"); + elf_end(e); + close(efd); + return; + } + + while ((char *) dof < (char *) dofdata->d_buf + dofdata->d_size) { + dof_next = (void *) ((char *) dof + dof->dofh_filesz); +#endif + + if (dof->dofh_ident[DOF_ID_MAG0] != DOF_MAG_MAG0 || + dof->dofh_ident[DOF_ID_MAG1] != DOF_MAG_MAG1 || + dof->dofh_ident[DOF_ID_MAG2] != DOF_MAG_MAG2 || + dof->dofh_ident[DOF_ID_MAG3] != DOF_MAG_MAG3) { + dprintf(0, ".SUNW_dof section corrupt\n"); + return; + } + + elf = (void *)lmp->l_addr; + + dh.dofhp_dof = (uintptr_t)dof; + dh.dofhp_addr = elf->e_type == ET_DYN ? (uintptr_t) lmp->l_addr : 0; + + if (lmid == 0) { + (void) snprintf(dh.dofhp_mod, sizeof (dh.dofhp_mod), + "%s", modname); + } else { + (void) snprintf(dh.dofhp_mod, sizeof (dh.dofhp_mod), + "LM%lu`%s", lmid, modname); + } + + if ((p = getenv("DTRACE_DOF_INIT_DEVNAME")) != NULL) + devnamep = p; + + if ((fd = open64(devnamep, O_RDWR)) < 0) { + dprintf(1, "failed to open helper device %s", devnamep); +#if defined(sun) + /* + * If the device path wasn't explicitly set, try again with + * the old device path. + */ + if (p != NULL) + return; + + devnamep = olddevname; + + if ((fd = open64(devnamep, O_RDWR)) < 0) { + dprintf(1, "failed to open helper device %s", devnamep); + return; + } +#else + return; +#endif + } + if ((gen = ioctl(fd, DTRACEHIOC_ADDDOF, &dh)) == -1) + dprintf(1, "DTrace ioctl failed for DOF at %p", dof); + else { + dprintf(1, "DTrace ioctl succeeded for DOF at %p\n", dof); +#if !defined(sun) + gen = dh.gen; +#endif + } + + (void) close(fd); + +#if !defined(sun) + /* End of while loop */ + dof = dof_next; + } + + elf_end(e); + (void) close(efd); +#endif +} + +#if defined(sun) +#pragma fini(dtrace_dof_fini) +#else +static void dtrace_dof_fini(void) __attribute__ ((destructor)); +#endif + +static void +dtrace_dof_fini(void) +{ + int fd; + + if ((fd = open64(devnamep, O_RDWR)) < 0) { + dprintf(1, "failed to open helper device %s", devnamep); + return; + } + + if ((gen = ioctl(fd, DTRACEHIOC_REMOVE, &gen)) == -1) + dprintf(1, "DTrace ioctl failed to remove DOF (%d)\n", gen); + else + dprintf(1, "DTrace ioctl removed DOF (%d)\n", gen); + + (void) close(fd); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_aggregate.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_aggregate.c new file mode 100644 index 0000000..6b571fa --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_aggregate.c @@ -0,0 +1,2198 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <stdlib.h> +#include <strings.h> +#include <errno.h> +#include <unistd.h> +#include <dt_impl.h> +#include <assert.h> +#if defined(sun) +#include <alloca.h> +#else +#include <sys/sysctl.h> +#include <libproc_compat.h> +#endif +#include <limits.h> + +#define DTRACE_AHASHSIZE 32779 /* big 'ol prime */ + +/* + * Because qsort(3C) does not allow an argument to be passed to a comparison + * function, the variables that affect comparison must regrettably be global; + * they are protected by a global static lock, dt_qsort_lock. + */ +static pthread_mutex_t dt_qsort_lock = PTHREAD_MUTEX_INITIALIZER; + +static int dt_revsort; +static int dt_keysort; +static int dt_keypos; + +#define DT_LESSTHAN (dt_revsort == 0 ? -1 : 1) +#define DT_GREATERTHAN (dt_revsort == 0 ? 1 : -1) + +static void +dt_aggregate_count(int64_t *existing, int64_t *new, size_t size) +{ + uint_t i; + + for (i = 0; i < size / sizeof (int64_t); i++) + existing[i] = existing[i] + new[i]; +} + +static int +dt_aggregate_countcmp(int64_t *lhs, int64_t *rhs) +{ + int64_t lvar = *lhs; + int64_t rvar = *rhs; + + if (lvar < rvar) + return (DT_LESSTHAN); + + if (lvar > rvar) + return (DT_GREATERTHAN); + + return (0); +} + +/*ARGSUSED*/ +static void +dt_aggregate_min(int64_t *existing, int64_t *new, size_t size) +{ + if (*new < *existing) + *existing = *new; +} + +/*ARGSUSED*/ +static void +dt_aggregate_max(int64_t *existing, int64_t *new, size_t size) +{ + if (*new > *existing) + *existing = *new; +} + +static int +dt_aggregate_averagecmp(int64_t *lhs, int64_t *rhs) +{ + int64_t lavg = lhs[0] ? (lhs[1] / lhs[0]) : 0; + int64_t ravg = rhs[0] ? (rhs[1] / rhs[0]) : 0; + + if (lavg < ravg) + return (DT_LESSTHAN); + + if (lavg > ravg) + return (DT_GREATERTHAN); + + return (0); +} + +static int +dt_aggregate_stddevcmp(int64_t *lhs, int64_t *rhs) +{ + uint64_t lsd = dt_stddev((uint64_t *)lhs, 1); + uint64_t rsd = dt_stddev((uint64_t *)rhs, 1); + + if (lsd < rsd) + return (DT_LESSTHAN); + + if (lsd > rsd) + return (DT_GREATERTHAN); + + return (0); +} + +/*ARGSUSED*/ +static void +dt_aggregate_lquantize(int64_t *existing, int64_t *new, size_t size) +{ + int64_t arg = *existing++; + uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); + int i; + + for (i = 0; i <= levels + 1; i++) + existing[i] = existing[i] + new[i + 1]; +} + +static long double +dt_aggregate_lquantizedsum(int64_t *lquanta) +{ + int64_t arg = *lquanta++; + int32_t base = DTRACE_LQUANTIZE_BASE(arg); + uint16_t step = DTRACE_LQUANTIZE_STEP(arg); + uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i; + long double total = (long double)lquanta[0] * (long double)(base - 1); + + for (i = 0; i < levels; base += step, i++) + total += (long double)lquanta[i + 1] * (long double)base; + + return (total + (long double)lquanta[levels + 1] * + (long double)(base + 1)); +} + +static int64_t +dt_aggregate_lquantizedzero(int64_t *lquanta) +{ + int64_t arg = *lquanta++; + int32_t base = DTRACE_LQUANTIZE_BASE(arg); + uint16_t step = DTRACE_LQUANTIZE_STEP(arg); + uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i; + + if (base - 1 == 0) + return (lquanta[0]); + + for (i = 0; i < levels; base += step, i++) { + if (base != 0) + continue; + + return (lquanta[i + 1]); + } + + if (base + 1 == 0) + return (lquanta[levels + 1]); + + return (0); +} + +static int +dt_aggregate_lquantizedcmp(int64_t *lhs, int64_t *rhs) +{ + long double lsum = dt_aggregate_lquantizedsum(lhs); + long double rsum = dt_aggregate_lquantizedsum(rhs); + int64_t lzero, rzero; + + if (lsum < rsum) + return (DT_LESSTHAN); + + if (lsum > rsum) + return (DT_GREATERTHAN); + + /* + * If they're both equal, then we will compare based on the weights at + * zero. If the weights at zero are equal (or if zero is not within + * the range of the linear quantization), then this will be judged a + * tie and will be resolved based on the key comparison. + */ + lzero = dt_aggregate_lquantizedzero(lhs); + rzero = dt_aggregate_lquantizedzero(rhs); + + if (lzero < rzero) + return (DT_LESSTHAN); + + if (lzero > rzero) + return (DT_GREATERTHAN); + + return (0); +} + +static void +dt_aggregate_llquantize(int64_t *existing, int64_t *new, size_t size) +{ + int i; + + for (i = 1; i < size / sizeof (int64_t); i++) + existing[i] = existing[i] + new[i]; +} + +static long double +dt_aggregate_llquantizedsum(int64_t *llquanta) +{ + int64_t arg = *llquanta++; + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); + int bin = 0, order; + int64_t value = 1, next, step; + long double total; + + assert(nsteps >= factor); + assert(nsteps % factor == 0); + + for (order = 0; order < low; order++) + value *= factor; + + total = (long double)llquanta[bin++] * (long double)(value - 1); + + next = value * factor; + step = next > nsteps ? next / nsteps : 1; + + while (order <= high) { + assert(value < next); + total += (long double)llquanta[bin++] * (long double)(value); + + if ((value += step) != next) + continue; + + next = value * factor; + step = next > nsteps ? next / nsteps : 1; + order++; + } + + return (total + (long double)llquanta[bin] * (long double)value); +} + +static int +dt_aggregate_llquantizedcmp(int64_t *lhs, int64_t *rhs) +{ + long double lsum = dt_aggregate_llquantizedsum(lhs); + long double rsum = dt_aggregate_llquantizedsum(rhs); + int64_t lzero, rzero; + + if (lsum < rsum) + return (DT_LESSTHAN); + + if (lsum > rsum) + return (DT_GREATERTHAN); + + /* + * If they're both equal, then we will compare based on the weights at + * zero. If the weights at zero are equal, then this will be judged a + * tie and will be resolved based on the key comparison. + */ + lzero = lhs[1]; + rzero = rhs[1]; + + if (lzero < rzero) + return (DT_LESSTHAN); + + if (lzero > rzero) + return (DT_GREATERTHAN); + + return (0); +} + +static int +dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs) +{ + int nbuckets = DTRACE_QUANTIZE_NBUCKETS; + long double ltotal = 0, rtotal = 0; + int64_t lzero, rzero; + uint_t i; + + for (i = 0; i < nbuckets; i++) { + int64_t bucketval = DTRACE_QUANTIZE_BUCKETVAL(i); + + if (bucketval == 0) { + lzero = lhs[i]; + rzero = rhs[i]; + } + + ltotal += (long double)bucketval * (long double)lhs[i]; + rtotal += (long double)bucketval * (long double)rhs[i]; + } + + if (ltotal < rtotal) + return (DT_LESSTHAN); + + if (ltotal > rtotal) + return (DT_GREATERTHAN); + + /* + * If they're both equal, then we will compare based on the weights at + * zero. If the weights at zero are equal, then this will be judged a + * tie and will be resolved based on the key comparison. + */ + if (lzero < rzero) + return (DT_LESSTHAN); + + if (lzero > rzero) + return (DT_GREATERTHAN); + + return (0); +} + +static void +dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data) +{ + uint64_t pid = data[0]; + uint64_t *pc = &data[1]; + struct ps_prochandle *P; + GElf_Sym sym; + + if (dtp->dt_vector != NULL) + return; + + if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL) + return; + + dt_proc_lock(dtp, P); + + if (Plookup_by_addr(P, *pc, NULL, 0, &sym) == 0) + *pc = sym.st_value; + + dt_proc_unlock(dtp, P); + dt_proc_release(dtp, P); +} + +static void +dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data) +{ + uint64_t pid = data[0]; + uint64_t *pc = &data[1]; + struct ps_prochandle *P; + const prmap_t *map; + + if (dtp->dt_vector != NULL) + return; + + if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL) + return; + + dt_proc_lock(dtp, P); + + if ((map = Paddr_to_map(P, *pc)) != NULL) + *pc = map->pr_vaddr; + + dt_proc_unlock(dtp, P); + dt_proc_release(dtp, P); +} + +static void +dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data) +{ + GElf_Sym sym; + uint64_t *pc = data; + + if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0) + *pc = sym.st_value; +} + +static void +dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *data) +{ + uint64_t *pc = data; + dt_module_t *dmp; + + if (dtp->dt_vector != NULL) { + /* + * We don't have a way of just getting the module for a + * vectored open, and it doesn't seem to be worth defining + * one. This means that use of mod() won't get true + * aggregation in the postmortem case (some modules may + * appear more than once in aggregation output). It seems + * unlikely that anyone will ever notice or care... + */ + return; + } + + for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL; + dmp = dt_list_next(dmp)) { + if (*pc - dmp->dm_text_va < dmp->dm_text_size) { + *pc = dmp->dm_text_va; + return; + } + } +} + +static dtrace_aggvarid_t +dt_aggregate_aggvarid(dt_ahashent_t *ent) +{ + dtrace_aggdesc_t *agg = ent->dtahe_data.dtada_desc; + caddr_t data = ent->dtahe_data.dtada_data; + dtrace_recdesc_t *rec = agg->dtagd_rec; + + /* + * First, we'll check the variable ID in the aggdesc. If it's valid, + * we'll return it. If not, we'll use the compiler-generated ID + * present as the first record. + */ + if (agg->dtagd_varid != DTRACE_AGGVARIDNONE) + return (agg->dtagd_varid); + + agg->dtagd_varid = *((dtrace_aggvarid_t *)(uintptr_t)(data + + rec->dtrd_offset)); + + return (agg->dtagd_varid); +} + + +static int +dt_aggregate_snap_cpu(dtrace_hdl_t *dtp, processorid_t cpu) +{ + dtrace_epid_t id; + uint64_t hashval; + size_t offs, roffs, size, ndx; + int i, j, rval; + caddr_t addr, data; + dtrace_recdesc_t *rec; + dt_aggregate_t *agp = &dtp->dt_aggregate; + dtrace_aggdesc_t *agg; + dt_ahash_t *hash = &agp->dtat_hash; + dt_ahashent_t *h; + dtrace_bufdesc_t b = agp->dtat_buf, *buf = &b; + dtrace_aggdata_t *aggdata; + int flags = agp->dtat_flags; + + buf->dtbd_cpu = cpu; + +#if defined(sun) + if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, buf) == -1) { +#else + if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, &buf) == -1) { +#endif + if (errno == ENOENT) { + /* + * If that failed with ENOENT, it may be because the + * CPU was unconfigured. This is okay; we'll just + * do nothing but return success. + */ + return (0); + } + + return (dt_set_errno(dtp, errno)); + } + + if (buf->dtbd_drops != 0) { + if (dt_handle_cpudrop(dtp, cpu, + DTRACEDROP_AGGREGATION, buf->dtbd_drops) == -1) + return (-1); + } + + if (buf->dtbd_size == 0) + return (0); + + if (hash->dtah_hash == NULL) { + size_t size; + + hash->dtah_size = DTRACE_AHASHSIZE; + size = hash->dtah_size * sizeof (dt_ahashent_t *); + + if ((hash->dtah_hash = malloc(size)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + bzero(hash->dtah_hash, size); + } + + for (offs = 0; offs < buf->dtbd_size; ) { + /* + * We're guaranteed to have an ID. + */ + id = *((dtrace_epid_t *)((uintptr_t)buf->dtbd_data + + (uintptr_t)offs)); + + if (id == DTRACE_AGGIDNONE) { + /* + * This is filler to assure proper alignment of the + * next record; we simply ignore it. + */ + offs += sizeof (id); + continue; + } + + if ((rval = dt_aggid_lookup(dtp, id, &agg)) != 0) + return (rval); + + addr = buf->dtbd_data + offs; + size = agg->dtagd_size; + hashval = 0; + + for (j = 0; j < agg->dtagd_nrecs - 1; j++) { + rec = &agg->dtagd_rec[j]; + roffs = rec->dtrd_offset; + + switch (rec->dtrd_action) { + case DTRACEACT_USYM: + dt_aggregate_usym(dtp, + /* LINTED - alignment */ + (uint64_t *)&addr[roffs]); + break; + + case DTRACEACT_UMOD: + dt_aggregate_umod(dtp, + /* LINTED - alignment */ + (uint64_t *)&addr[roffs]); + break; + + case DTRACEACT_SYM: + /* LINTED - alignment */ + dt_aggregate_sym(dtp, (uint64_t *)&addr[roffs]); + break; + + case DTRACEACT_MOD: + /* LINTED - alignment */ + dt_aggregate_mod(dtp, (uint64_t *)&addr[roffs]); + break; + + default: + break; + } + + for (i = 0; i < rec->dtrd_size; i++) + hashval += addr[roffs + i]; + } + + ndx = hashval % hash->dtah_size; + + for (h = hash->dtah_hash[ndx]; h != NULL; h = h->dtahe_next) { + if (h->dtahe_hashval != hashval) + continue; + + if (h->dtahe_size != size) + continue; + + aggdata = &h->dtahe_data; + data = aggdata->dtada_data; + + for (j = 0; j < agg->dtagd_nrecs - 1; j++) { + rec = &agg->dtagd_rec[j]; + roffs = rec->dtrd_offset; + + for (i = 0; i < rec->dtrd_size; i++) + if (addr[roffs + i] != data[roffs + i]) + goto hashnext; + } + + /* + * We found it. Now we need to apply the aggregating + * action on the data here. + */ + rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1]; + roffs = rec->dtrd_offset; + /* LINTED - alignment */ + h->dtahe_aggregate((int64_t *)&data[roffs], + /* LINTED - alignment */ + (int64_t *)&addr[roffs], rec->dtrd_size); + + /* + * If we're keeping per CPU data, apply the aggregating + * action there as well. + */ + if (aggdata->dtada_percpu != NULL) { + data = aggdata->dtada_percpu[cpu]; + + /* LINTED - alignment */ + h->dtahe_aggregate((int64_t *)data, + /* LINTED - alignment */ + (int64_t *)&addr[roffs], rec->dtrd_size); + } + + goto bufnext; +hashnext: + continue; + } + + /* + * If we're here, we couldn't find an entry for this record. + */ + if ((h = malloc(sizeof (dt_ahashent_t))) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + bzero(h, sizeof (dt_ahashent_t)); + aggdata = &h->dtahe_data; + + if ((aggdata->dtada_data = malloc(size)) == NULL) { + free(h); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + bcopy(addr, aggdata->dtada_data, size); + aggdata->dtada_size = size; + aggdata->dtada_desc = agg; + aggdata->dtada_handle = dtp; + (void) dt_epid_lookup(dtp, agg->dtagd_epid, + &aggdata->dtada_edesc, &aggdata->dtada_pdesc); + aggdata->dtada_normal = 1; + + h->dtahe_hashval = hashval; + h->dtahe_size = size; + (void) dt_aggregate_aggvarid(h); + + rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1]; + + if (flags & DTRACE_A_PERCPU) { + int max_cpus = agp->dtat_maxcpu; + caddr_t *percpu = malloc(max_cpus * sizeof (caddr_t)); + + if (percpu == NULL) { + free(aggdata->dtada_data); + free(h); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + for (j = 0; j < max_cpus; j++) { + percpu[j] = malloc(rec->dtrd_size); + + if (percpu[j] == NULL) { + while (--j >= 0) + free(percpu[j]); + + free(aggdata->dtada_data); + free(h); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + if (j == cpu) { + bcopy(&addr[rec->dtrd_offset], + percpu[j], rec->dtrd_size); + } else { + bzero(percpu[j], rec->dtrd_size); + } + } + + aggdata->dtada_percpu = percpu; + } + + switch (rec->dtrd_action) { + case DTRACEAGG_MIN: + h->dtahe_aggregate = dt_aggregate_min; + break; + + case DTRACEAGG_MAX: + h->dtahe_aggregate = dt_aggregate_max; + break; + + case DTRACEAGG_LQUANTIZE: + h->dtahe_aggregate = dt_aggregate_lquantize; + break; + + case DTRACEAGG_LLQUANTIZE: + h->dtahe_aggregate = dt_aggregate_llquantize; + break; + + case DTRACEAGG_COUNT: + case DTRACEAGG_SUM: + case DTRACEAGG_AVG: + case DTRACEAGG_STDDEV: + case DTRACEAGG_QUANTIZE: + h->dtahe_aggregate = dt_aggregate_count; + break; + + default: + return (dt_set_errno(dtp, EDT_BADAGG)); + } + + if (hash->dtah_hash[ndx] != NULL) + hash->dtah_hash[ndx]->dtahe_prev = h; + + h->dtahe_next = hash->dtah_hash[ndx]; + hash->dtah_hash[ndx] = h; + + if (hash->dtah_all != NULL) + hash->dtah_all->dtahe_prevall = h; + + h->dtahe_nextall = hash->dtah_all; + hash->dtah_all = h; +bufnext: + offs += agg->dtagd_size; + } + + return (0); +} + +int +dtrace_aggregate_snap(dtrace_hdl_t *dtp) +{ + int i, rval; + dt_aggregate_t *agp = &dtp->dt_aggregate; + hrtime_t now = gethrtime(); + dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_AGGRATE]; + + if (dtp->dt_lastagg != 0) { + if (now - dtp->dt_lastagg < interval) + return (0); + + dtp->dt_lastagg += interval; + } else { + dtp->dt_lastagg = now; + } + + if (!dtp->dt_active) + return (dt_set_errno(dtp, EINVAL)); + + if (agp->dtat_buf.dtbd_size == 0) + return (0); + + for (i = 0; i < agp->dtat_ncpus; i++) { + if ((rval = dt_aggregate_snap_cpu(dtp, agp->dtat_cpus[i]))) + return (rval); + } + + return (0); +} + +static int +dt_aggregate_hashcmp(const void *lhs, const void *rhs) +{ + dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); + dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); + dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; + dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; + + if (lagg->dtagd_nrecs < ragg->dtagd_nrecs) + return (DT_LESSTHAN); + + if (lagg->dtagd_nrecs > ragg->dtagd_nrecs) + return (DT_GREATERTHAN); + + return (0); +} + +static int +dt_aggregate_varcmp(const void *lhs, const void *rhs) +{ + dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); + dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); + dtrace_aggvarid_t lid, rid; + + lid = dt_aggregate_aggvarid(lh); + rid = dt_aggregate_aggvarid(rh); + + if (lid < rid) + return (DT_LESSTHAN); + + if (lid > rid) + return (DT_GREATERTHAN); + + return (0); +} + +static int +dt_aggregate_keycmp(const void *lhs, const void *rhs) +{ + dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); + dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); + dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; + dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; + dtrace_recdesc_t *lrec, *rrec; + char *ldata, *rdata; + int rval, i, j, keypos, nrecs; + + if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0) + return (rval); + + nrecs = lagg->dtagd_nrecs - 1; + assert(nrecs == ragg->dtagd_nrecs - 1); + + keypos = dt_keypos + 1 >= nrecs ? 0 : dt_keypos; + + for (i = 1; i < nrecs; i++) { + uint64_t lval, rval; + int ndx = i + keypos; + + if (ndx >= nrecs) + ndx = ndx - nrecs + 1; + + lrec = &lagg->dtagd_rec[ndx]; + rrec = &ragg->dtagd_rec[ndx]; + + ldata = lh->dtahe_data.dtada_data + lrec->dtrd_offset; + rdata = rh->dtahe_data.dtada_data + rrec->dtrd_offset; + + if (lrec->dtrd_size < rrec->dtrd_size) + return (DT_LESSTHAN); + + if (lrec->dtrd_size > rrec->dtrd_size) + return (DT_GREATERTHAN); + + switch (lrec->dtrd_size) { + case sizeof (uint64_t): + /* LINTED - alignment */ + lval = *((uint64_t *)ldata); + /* LINTED - alignment */ + rval = *((uint64_t *)rdata); + break; + + case sizeof (uint32_t): + /* LINTED - alignment */ + lval = *((uint32_t *)ldata); + /* LINTED - alignment */ + rval = *((uint32_t *)rdata); + break; + + case sizeof (uint16_t): + /* LINTED - alignment */ + lval = *((uint16_t *)ldata); + /* LINTED - alignment */ + rval = *((uint16_t *)rdata); + break; + + case sizeof (uint8_t): + lval = *((uint8_t *)ldata); + rval = *((uint8_t *)rdata); + break; + + default: + switch (lrec->dtrd_action) { + case DTRACEACT_UMOD: + case DTRACEACT_UADDR: + case DTRACEACT_USYM: + for (j = 0; j < 2; j++) { + /* LINTED - alignment */ + lval = ((uint64_t *)ldata)[j]; + /* LINTED - alignment */ + rval = ((uint64_t *)rdata)[j]; + + if (lval < rval) + return (DT_LESSTHAN); + + if (lval > rval) + return (DT_GREATERTHAN); + } + + break; + + default: + for (j = 0; j < lrec->dtrd_size; j++) { + lval = ((uint8_t *)ldata)[j]; + rval = ((uint8_t *)rdata)[j]; + + if (lval < rval) + return (DT_LESSTHAN); + + if (lval > rval) + return (DT_GREATERTHAN); + } + } + + continue; + } + + if (lval < rval) + return (DT_LESSTHAN); + + if (lval > rval) + return (DT_GREATERTHAN); + } + + return (0); +} + +static int +dt_aggregate_valcmp(const void *lhs, const void *rhs) +{ + dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); + dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); + dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; + dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; + caddr_t ldata = lh->dtahe_data.dtada_data; + caddr_t rdata = rh->dtahe_data.dtada_data; + dtrace_recdesc_t *lrec, *rrec; + int64_t *laddr, *raddr; + int rval; + + assert(lagg->dtagd_nrecs == ragg->dtagd_nrecs); + + lrec = &lagg->dtagd_rec[lagg->dtagd_nrecs - 1]; + rrec = &ragg->dtagd_rec[ragg->dtagd_nrecs - 1]; + + assert(lrec->dtrd_action == rrec->dtrd_action); + + laddr = (int64_t *)(uintptr_t)(ldata + lrec->dtrd_offset); + raddr = (int64_t *)(uintptr_t)(rdata + rrec->dtrd_offset); + + switch (lrec->dtrd_action) { + case DTRACEAGG_AVG: + rval = dt_aggregate_averagecmp(laddr, raddr); + break; + + case DTRACEAGG_STDDEV: + rval = dt_aggregate_stddevcmp(laddr, raddr); + break; + + case DTRACEAGG_QUANTIZE: + rval = dt_aggregate_quantizedcmp(laddr, raddr); + break; + + case DTRACEAGG_LQUANTIZE: + rval = dt_aggregate_lquantizedcmp(laddr, raddr); + break; + + case DTRACEAGG_LLQUANTIZE: + rval = dt_aggregate_llquantizedcmp(laddr, raddr); + break; + + case DTRACEAGG_COUNT: + case DTRACEAGG_SUM: + case DTRACEAGG_MIN: + case DTRACEAGG_MAX: + rval = dt_aggregate_countcmp(laddr, raddr); + break; + + default: + assert(0); + } + + return (rval); +} + +static int +dt_aggregate_valkeycmp(const void *lhs, const void *rhs) +{ + int rval; + + if ((rval = dt_aggregate_valcmp(lhs, rhs)) != 0) + return (rval); + + /* + * If we're here, the values for the two aggregation elements are + * equal. We already know that the key layout is the same for the two + * elements; we must now compare the keys themselves as a tie-breaker. + */ + return (dt_aggregate_keycmp(lhs, rhs)); +} + +static int +dt_aggregate_keyvarcmp(const void *lhs, const void *rhs) +{ + int rval; + + if ((rval = dt_aggregate_keycmp(lhs, rhs)) != 0) + return (rval); + + return (dt_aggregate_varcmp(lhs, rhs)); +} + +static int +dt_aggregate_varkeycmp(const void *lhs, const void *rhs) +{ + int rval; + + if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0) + return (rval); + + return (dt_aggregate_keycmp(lhs, rhs)); +} + +static int +dt_aggregate_valvarcmp(const void *lhs, const void *rhs) +{ + int rval; + + if ((rval = dt_aggregate_valkeycmp(lhs, rhs)) != 0) + return (rval); + + return (dt_aggregate_varcmp(lhs, rhs)); +} + +static int +dt_aggregate_varvalcmp(const void *lhs, const void *rhs) +{ + int rval; + + if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0) + return (rval); + + return (dt_aggregate_valkeycmp(lhs, rhs)); +} + +static int +dt_aggregate_keyvarrevcmp(const void *lhs, const void *rhs) +{ + return (dt_aggregate_keyvarcmp(rhs, lhs)); +} + +static int +dt_aggregate_varkeyrevcmp(const void *lhs, const void *rhs) +{ + return (dt_aggregate_varkeycmp(rhs, lhs)); +} + +static int +dt_aggregate_valvarrevcmp(const void *lhs, const void *rhs) +{ + return (dt_aggregate_valvarcmp(rhs, lhs)); +} + +static int +dt_aggregate_varvalrevcmp(const void *lhs, const void *rhs) +{ + return (dt_aggregate_varvalcmp(rhs, lhs)); +} + +static int +dt_aggregate_bundlecmp(const void *lhs, const void *rhs) +{ + dt_ahashent_t **lh = *((dt_ahashent_t ***)lhs); + dt_ahashent_t **rh = *((dt_ahashent_t ***)rhs); + int i, rval; + + if (dt_keysort) { + /* + * If we're sorting on keys, we need to scan until we find the + * last entry -- that's the representative key. (The order of + * the bundle is values followed by key to accommodate the + * default behavior of sorting by value.) If the keys are + * equal, we'll fall into the value comparison loop, below. + */ + for (i = 0; lh[i + 1] != NULL; i++) + continue; + + assert(i != 0); + assert(rh[i + 1] == NULL); + + if ((rval = dt_aggregate_keycmp(&lh[i], &rh[i])) != 0) + return (rval); + } + + for (i = 0; ; i++) { + if (lh[i + 1] == NULL) { + /* + * All of the values are equal; if we're sorting on + * keys, then we're only here because the keys were + * found to be equal and these records are therefore + * equal. If we're not sorting on keys, we'll use the + * key comparison from the representative key as the + * tie-breaker. + */ + if (dt_keysort) + return (0); + + assert(i != 0); + assert(rh[i + 1] == NULL); + return (dt_aggregate_keycmp(&lh[i], &rh[i])); + } else { + if ((rval = dt_aggregate_valcmp(&lh[i], &rh[i])) != 0) + return (rval); + } + } +} + +int +dt_aggregate_go(dtrace_hdl_t *dtp) +{ + dt_aggregate_t *agp = &dtp->dt_aggregate; + dtrace_optval_t size, cpu; + dtrace_bufdesc_t *buf = &agp->dtat_buf; + int rval, i; + + assert(agp->dtat_maxcpu == 0); + assert(agp->dtat_ncpu == 0); + assert(agp->dtat_cpus == NULL); + + agp->dtat_maxcpu = dt_sysconf(dtp, _SC_CPUID_MAX) + 1; + agp->dtat_ncpu = dt_sysconf(dtp, _SC_NPROCESSORS_MAX); + agp->dtat_cpus = malloc(agp->dtat_ncpu * sizeof (processorid_t)); + + if (agp->dtat_cpus == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + /* + * Use the aggregation buffer size as reloaded from the kernel. + */ + size = dtp->dt_options[DTRACEOPT_AGGSIZE]; + + rval = dtrace_getopt(dtp, "aggsize", &size); + assert(rval == 0); + + if (size == 0 || size == DTRACEOPT_UNSET) + return (0); + + buf = &agp->dtat_buf; + buf->dtbd_size = size; + + if ((buf->dtbd_data = malloc(buf->dtbd_size)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + /* + * Now query for the CPUs enabled. + */ + rval = dtrace_getopt(dtp, "cpu", &cpu); + assert(rval == 0 && cpu != DTRACEOPT_UNSET); + + if (cpu != DTRACE_CPUALL) { + assert(cpu < agp->dtat_ncpu); + agp->dtat_cpus[agp->dtat_ncpus++] = (processorid_t)cpu; + + return (0); + } + + agp->dtat_ncpus = 0; + for (i = 0; i < agp->dtat_maxcpu; i++) { + if (dt_status(dtp, i) == -1) + continue; + + agp->dtat_cpus[agp->dtat_ncpus++] = i; + } + + return (0); +} + +static int +dt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval) +{ + dt_aggregate_t *agp = &dtp->dt_aggregate; + dtrace_aggdata_t *data; + dtrace_aggdesc_t *aggdesc; + dtrace_recdesc_t *rec; + int i; + + switch (rval) { + case DTRACE_AGGWALK_NEXT: + break; + + case DTRACE_AGGWALK_CLEAR: { + uint32_t size, offs = 0; + + aggdesc = h->dtahe_data.dtada_desc; + rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; + size = rec->dtrd_size; + data = &h->dtahe_data; + + if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) { + offs = sizeof (uint64_t); + size -= sizeof (uint64_t); + } + + bzero(&data->dtada_data[rec->dtrd_offset] + offs, size); + + if (data->dtada_percpu == NULL) + break; + + for (i = 0; i < dtp->dt_aggregate.dtat_maxcpu; i++) + bzero(data->dtada_percpu[i] + offs, size); + break; + } + + case DTRACE_AGGWALK_ERROR: + /* + * We assume that errno is already set in this case. + */ + return (dt_set_errno(dtp, errno)); + + case DTRACE_AGGWALK_ABORT: + return (dt_set_errno(dtp, EDT_DIRABORT)); + + case DTRACE_AGGWALK_DENORMALIZE: + h->dtahe_data.dtada_normal = 1; + return (0); + + case DTRACE_AGGWALK_NORMALIZE: + if (h->dtahe_data.dtada_normal == 0) { + h->dtahe_data.dtada_normal = 1; + return (dt_set_errno(dtp, EDT_BADRVAL)); + } + + return (0); + + case DTRACE_AGGWALK_REMOVE: { + dtrace_aggdata_t *aggdata = &h->dtahe_data; + int max_cpus = agp->dtat_maxcpu; + + /* + * First, remove this hash entry from its hash chain. + */ + if (h->dtahe_prev != NULL) { + h->dtahe_prev->dtahe_next = h->dtahe_next; + } else { + dt_ahash_t *hash = &agp->dtat_hash; + size_t ndx = h->dtahe_hashval % hash->dtah_size; + + assert(hash->dtah_hash[ndx] == h); + hash->dtah_hash[ndx] = h->dtahe_next; + } + + if (h->dtahe_next != NULL) + h->dtahe_next->dtahe_prev = h->dtahe_prev; + + /* + * Now remove it from the list of all hash entries. + */ + if (h->dtahe_prevall != NULL) { + h->dtahe_prevall->dtahe_nextall = h->dtahe_nextall; + } else { + dt_ahash_t *hash = &agp->dtat_hash; + + assert(hash->dtah_all == h); + hash->dtah_all = h->dtahe_nextall; + } + + if (h->dtahe_nextall != NULL) + h->dtahe_nextall->dtahe_prevall = h->dtahe_prevall; + + /* + * We're unlinked. We can safely destroy the data. + */ + if (aggdata->dtada_percpu != NULL) { + for (i = 0; i < max_cpus; i++) + free(aggdata->dtada_percpu[i]); + free(aggdata->dtada_percpu); + } + + free(aggdata->dtada_data); + free(h); + + return (0); + } + + default: + return (dt_set_errno(dtp, EDT_BADRVAL)); + } + + return (0); +} + +void +dt_aggregate_qsort(dtrace_hdl_t *dtp, void *base, size_t nel, size_t width, + int (*compar)(const void *, const void *)) +{ + int rev = dt_revsort, key = dt_keysort, keypos = dt_keypos; + dtrace_optval_t keyposopt = dtp->dt_options[DTRACEOPT_AGGSORTKEYPOS]; + + dt_revsort = (dtp->dt_options[DTRACEOPT_AGGSORTREV] != DTRACEOPT_UNSET); + dt_keysort = (dtp->dt_options[DTRACEOPT_AGGSORTKEY] != DTRACEOPT_UNSET); + + if (keyposopt != DTRACEOPT_UNSET && keyposopt <= INT_MAX) { + dt_keypos = (int)keyposopt; + } else { + dt_keypos = 0; + } + + if (compar == NULL) { + if (!dt_keysort) { + compar = dt_aggregate_varvalcmp; + } else { + compar = dt_aggregate_varkeycmp; + } + } + + qsort(base, nel, width, compar); + + dt_revsort = rev; + dt_keysort = key; + dt_keypos = keypos; +} + +int +dtrace_aggregate_walk(dtrace_hdl_t *dtp, dtrace_aggregate_f *func, void *arg) +{ + dt_ahashent_t *h, *next; + dt_ahash_t *hash = &dtp->dt_aggregate.dtat_hash; + + for (h = hash->dtah_all; h != NULL; h = next) { + /* + * dt_aggwalk_rval() can potentially remove the current hash + * entry; we need to load the next hash entry before calling + * into it. + */ + next = h->dtahe_nextall; + + if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1) + return (-1); + } + + return (0); +} + +static int +dt_aggregate_total(dtrace_hdl_t *dtp, boolean_t clear) +{ + dt_ahashent_t *h; + dtrace_aggdata_t **total; + dtrace_aggid_t max = DTRACE_AGGVARIDNONE, id; + dt_aggregate_t *agp = &dtp->dt_aggregate; + dt_ahash_t *hash = &agp->dtat_hash; + uint32_t tflags; + + tflags = DTRACE_A_TOTAL | DTRACE_A_HASNEGATIVES | DTRACE_A_HASPOSITIVES; + + /* + * If we need to deliver per-aggregation totals, we're going to take + * three passes over the aggregate: one to clear everything out and + * determine our maximum aggregation ID, one to actually total + * everything up, and a final pass to assign the totals to the + * individual elements. + */ + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { + dtrace_aggdata_t *aggdata = &h->dtahe_data; + + if ((id = dt_aggregate_aggvarid(h)) > max) + max = id; + + aggdata->dtada_total = 0; + aggdata->dtada_flags &= ~tflags; + } + + if (clear || max == DTRACE_AGGVARIDNONE) + return (0); + + total = dt_zalloc(dtp, (max + 1) * sizeof (dtrace_aggdata_t *)); + + if (total == NULL) + return (-1); + + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { + dtrace_aggdata_t *aggdata = &h->dtahe_data; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + dtrace_recdesc_t *rec; + caddr_t data; + int64_t val, *addr; + + rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1]; + data = aggdata->dtada_data; + addr = (int64_t *)(uintptr_t)(data + rec->dtrd_offset); + + switch (rec->dtrd_action) { + case DTRACEAGG_STDDEV: + val = dt_stddev((uint64_t *)addr, 1); + break; + + case DTRACEAGG_SUM: + case DTRACEAGG_COUNT: + val = *addr; + break; + + case DTRACEAGG_AVG: + val = addr[0] ? (addr[1] / addr[0]) : 0; + break; + + default: + continue; + } + + if (total[agg->dtagd_varid] == NULL) { + total[agg->dtagd_varid] = aggdata; + aggdata->dtada_flags |= DTRACE_A_TOTAL; + } else { + aggdata = total[agg->dtagd_varid]; + } + + if (val > 0) + aggdata->dtada_flags |= DTRACE_A_HASPOSITIVES; + + if (val < 0) { + aggdata->dtada_flags |= DTRACE_A_HASNEGATIVES; + val = -val; + } + + if (dtp->dt_options[DTRACEOPT_AGGZOOM] != DTRACEOPT_UNSET) { + val = (int64_t)((long double)val * + (1 / DTRACE_AGGZOOM_MAX)); + + if (val > aggdata->dtada_total) + aggdata->dtada_total = val; + } else { + aggdata->dtada_total += val; + } + } + + /* + * And now one final pass to set everyone's total. + */ + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { + dtrace_aggdata_t *aggdata = &h->dtahe_data, *t; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + + if ((t = total[agg->dtagd_varid]) == NULL || aggdata == t) + continue; + + aggdata->dtada_total = t->dtada_total; + aggdata->dtada_flags |= (t->dtada_flags & tflags); + } + + dt_free(dtp, total); + + return (0); +} + +static int +dt_aggregate_minmaxbin(dtrace_hdl_t *dtp, boolean_t clear) +{ + dt_ahashent_t *h; + dtrace_aggdata_t **minmax; + dtrace_aggid_t max = DTRACE_AGGVARIDNONE, id; + dt_aggregate_t *agp = &dtp->dt_aggregate; + dt_ahash_t *hash = &agp->dtat_hash; + + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { + dtrace_aggdata_t *aggdata = &h->dtahe_data; + + if ((id = dt_aggregate_aggvarid(h)) > max) + max = id; + + aggdata->dtada_minbin = 0; + aggdata->dtada_maxbin = 0; + aggdata->dtada_flags &= ~DTRACE_A_MINMAXBIN; + } + + if (clear || max == DTRACE_AGGVARIDNONE) + return (0); + + minmax = dt_zalloc(dtp, (max + 1) * sizeof (dtrace_aggdata_t *)); + + if (minmax == NULL) + return (-1); + + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { + dtrace_aggdata_t *aggdata = &h->dtahe_data; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + dtrace_recdesc_t *rec; + caddr_t data; + int64_t *addr; + int minbin = -1, maxbin = -1, i; + int start = 0, size; + + rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1]; + size = rec->dtrd_size / sizeof (int64_t); + data = aggdata->dtada_data; + addr = (int64_t *)(uintptr_t)(data + rec->dtrd_offset); + + switch (rec->dtrd_action) { + case DTRACEAGG_LQUANTIZE: + /* + * For lquantize(), we always display the entire range + * of the aggregation when aggpack is set. + */ + start = 1; + minbin = start; + maxbin = size - 1 - start; + break; + + case DTRACEAGG_QUANTIZE: + for (i = start; i < size; i++) { + if (!addr[i]) + continue; + + if (minbin == -1) + minbin = i - start; + + maxbin = i - start; + } + + if (minbin == -1) { + /* + * If we have no data (e.g., due to a clear() + * or negative increments), we'll use the + * zero bucket as both our min and max. + */ + minbin = maxbin = DTRACE_QUANTIZE_ZEROBUCKET; + } + + break; + + default: + continue; + } + + if (minmax[agg->dtagd_varid] == NULL) { + minmax[agg->dtagd_varid] = aggdata; + aggdata->dtada_flags |= DTRACE_A_MINMAXBIN; + aggdata->dtada_minbin = minbin; + aggdata->dtada_maxbin = maxbin; + continue; + } + + if (minbin < minmax[agg->dtagd_varid]->dtada_minbin) + minmax[agg->dtagd_varid]->dtada_minbin = minbin; + + if (maxbin > minmax[agg->dtagd_varid]->dtada_maxbin) + minmax[agg->dtagd_varid]->dtada_maxbin = maxbin; + } + + /* + * And now one final pass to set everyone's minbin and maxbin. + */ + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { + dtrace_aggdata_t *aggdata = &h->dtahe_data, *mm; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + + if ((mm = minmax[agg->dtagd_varid]) == NULL || aggdata == mm) + continue; + + aggdata->dtada_minbin = mm->dtada_minbin; + aggdata->dtada_maxbin = mm->dtada_maxbin; + aggdata->dtada_flags |= DTRACE_A_MINMAXBIN; + } + + dt_free(dtp, minmax); + + return (0); +} + +static int +dt_aggregate_walk_sorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg, + int (*sfunc)(const void *, const void *)) +{ + dt_aggregate_t *agp = &dtp->dt_aggregate; + dt_ahashent_t *h, **sorted; + dt_ahash_t *hash = &agp->dtat_hash; + size_t i, nentries = 0; + int rval = -1; + + agp->dtat_flags &= ~(DTRACE_A_TOTAL | DTRACE_A_MINMAXBIN); + + if (dtp->dt_options[DTRACEOPT_AGGHIST] != DTRACEOPT_UNSET) { + agp->dtat_flags |= DTRACE_A_TOTAL; + + if (dt_aggregate_total(dtp, B_FALSE) != 0) + return (-1); + } + + if (dtp->dt_options[DTRACEOPT_AGGPACK] != DTRACEOPT_UNSET) { + agp->dtat_flags |= DTRACE_A_MINMAXBIN; + + if (dt_aggregate_minmaxbin(dtp, B_FALSE) != 0) + return (-1); + } + + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) + nentries++; + + sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *)); + + if (sorted == NULL) + goto out; + + for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) + sorted[i++] = h; + + (void) pthread_mutex_lock(&dt_qsort_lock); + + if (sfunc == NULL) { + dt_aggregate_qsort(dtp, sorted, nentries, + sizeof (dt_ahashent_t *), NULL); + } else { + /* + * If we've been explicitly passed a sorting function, + * we'll use that -- ignoring the values of the "aggsortrev", + * "aggsortkey" and "aggsortkeypos" options. + */ + qsort(sorted, nentries, sizeof (dt_ahashent_t *), sfunc); + } + + (void) pthread_mutex_unlock(&dt_qsort_lock); + + for (i = 0; i < nentries; i++) { + h = sorted[i]; + + if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1) + goto out; + } + + rval = 0; +out: + if (agp->dtat_flags & DTRACE_A_TOTAL) + (void) dt_aggregate_total(dtp, B_TRUE); + + if (agp->dtat_flags & DTRACE_A_MINMAXBIN) + (void) dt_aggregate_minmaxbin(dtp, B_TRUE); + + dt_free(dtp, sorted); + return (rval); +} + +int +dtrace_aggregate_walk_sorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, arg, NULL)); +} + +int +dtrace_aggregate_walk_keysorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, + arg, dt_aggregate_varkeycmp)); +} + +int +dtrace_aggregate_walk_valsorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, + arg, dt_aggregate_varvalcmp)); +} + +int +dtrace_aggregate_walk_keyvarsorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, + arg, dt_aggregate_keyvarcmp)); +} + +int +dtrace_aggregate_walk_valvarsorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, + arg, dt_aggregate_valvarcmp)); +} + +int +dtrace_aggregate_walk_keyrevsorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, + arg, dt_aggregate_varkeyrevcmp)); +} + +int +dtrace_aggregate_walk_valrevsorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, + arg, dt_aggregate_varvalrevcmp)); +} + +int +dtrace_aggregate_walk_keyvarrevsorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, + arg, dt_aggregate_keyvarrevcmp)); +} + +int +dtrace_aggregate_walk_valvarrevsorted(dtrace_hdl_t *dtp, + dtrace_aggregate_f *func, void *arg) +{ + return (dt_aggregate_walk_sorted(dtp, func, + arg, dt_aggregate_valvarrevcmp)); +} + +int +dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggvarid_t *aggvars, + int naggvars, dtrace_aggregate_walk_joined_f *func, void *arg) +{ + dt_aggregate_t *agp = &dtp->dt_aggregate; + dt_ahashent_t *h, **sorted = NULL, ***bundle, **nbundle; + const dtrace_aggdata_t **data; + dt_ahashent_t *zaggdata = NULL; + dt_ahash_t *hash = &agp->dtat_hash; + size_t nentries = 0, nbundles = 0, start, zsize = 0, bundlesize; + dtrace_aggvarid_t max = 0, aggvar; + int rval = -1, *map, *remap = NULL; + int i, j; + dtrace_optval_t sortpos = dtp->dt_options[DTRACEOPT_AGGSORTPOS]; + + /* + * If the sorting position is greater than the number of aggregation + * variable IDs, we silently set it to 0. + */ + if (sortpos == DTRACEOPT_UNSET || sortpos >= naggvars) + sortpos = 0; + + /* + * First we need to translate the specified aggregation variable IDs + * into a linear map that will allow us to translate an aggregation + * variable ID into its position in the specified aggvars. + */ + for (i = 0; i < naggvars; i++) { + if (aggvars[i] == DTRACE_AGGVARIDNONE || aggvars[i] < 0) + return (dt_set_errno(dtp, EDT_BADAGGVAR)); + + if (aggvars[i] > max) + max = aggvars[i]; + } + + if ((map = dt_zalloc(dtp, (max + 1) * sizeof (int))) == NULL) + return (-1); + + zaggdata = dt_zalloc(dtp, naggvars * sizeof (dt_ahashent_t)); + + if (zaggdata == NULL) + goto out; + + for (i = 0; i < naggvars; i++) { + int ndx = i + sortpos; + + if (ndx >= naggvars) + ndx -= naggvars; + + aggvar = aggvars[ndx]; + assert(aggvar <= max); + + if (map[aggvar]) { + /* + * We have an aggregation variable that is present + * more than once in the array of aggregation + * variables. While it's unclear why one might want + * to do this, it's legal. To support this construct, + * we will allocate a remap that will indicate the + * position from which this aggregation variable + * should be pulled. (That is, where the remap will + * map from one position to another.) + */ + if (remap == NULL) { + remap = dt_zalloc(dtp, naggvars * sizeof (int)); + + if (remap == NULL) + goto out; + } + + /* + * Given that the variable is already present, assert + * that following through the mapping and adjusting + * for the sort position yields the same aggregation + * variable ID. + */ + assert(aggvars[(map[aggvar] - 1 + sortpos) % + naggvars] == aggvars[ndx]); + + remap[i] = map[aggvar]; + continue; + } + + map[aggvar] = i + 1; + } + + /* + * We need to take two passes over the data to size our allocation, so + * we'll use the first pass to also fill in the zero-filled data to be + * used to properly format a zero-valued aggregation. + */ + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { + dtrace_aggvarid_t id; + int ndx; + + if ((id = dt_aggregate_aggvarid(h)) > max || !(ndx = map[id])) + continue; + + if (zaggdata[ndx - 1].dtahe_size == 0) { + zaggdata[ndx - 1].dtahe_size = h->dtahe_size; + zaggdata[ndx - 1].dtahe_data = h->dtahe_data; + } + + nentries++; + } + + if (nentries == 0) { + /* + * We couldn't find any entries; there is nothing else to do. + */ + rval = 0; + goto out; + } + + /* + * Before we sort the data, we're going to look for any holes in our + * zero-filled data. This will occur if an aggregation variable that + * we are being asked to print has not yet been assigned the result of + * any aggregating action for _any_ tuple. The issue becomes that we + * would like a zero value to be printed for all columns for this + * aggregation, but without any record description, we don't know the + * aggregating action that corresponds to the aggregation variable. To + * try to find a match, we're simply going to lookup aggregation IDs + * (which are guaranteed to be contiguous and to start from 1), looking + * for the specified aggregation variable ID. If we find a match, + * we'll use that. If we iterate over all aggregation IDs and don't + * find a match, then we must be an anonymous enabling. (Anonymous + * enablings can't currently derive either aggregation variable IDs or + * aggregation variable names given only an aggregation ID.) In this + * obscure case (anonymous enabling, multiple aggregation printa() with + * some aggregations not represented for any tuple), our defined + * behavior is that the zero will be printed in the format of the first + * aggregation variable that contains any non-zero value. + */ + for (i = 0; i < naggvars; i++) { + if (zaggdata[i].dtahe_size == 0) { + dtrace_aggvarid_t aggvar; + + aggvar = aggvars[(i - sortpos + naggvars) % naggvars]; + assert(zaggdata[i].dtahe_data.dtada_data == NULL); + + for (j = DTRACE_AGGIDNONE + 1; ; j++) { + dtrace_aggdesc_t *agg; + dtrace_aggdata_t *aggdata; + + if (dt_aggid_lookup(dtp, j, &agg) != 0) + break; + + if (agg->dtagd_varid != aggvar) + continue; + + /* + * We have our description -- now we need to + * cons up the zaggdata entry for it. + */ + aggdata = &zaggdata[i].dtahe_data; + aggdata->dtada_size = agg->dtagd_size; + aggdata->dtada_desc = agg; + aggdata->dtada_handle = dtp; + (void) dt_epid_lookup(dtp, agg->dtagd_epid, + &aggdata->dtada_edesc, + &aggdata->dtada_pdesc); + aggdata->dtada_normal = 1; + zaggdata[i].dtahe_hashval = 0; + zaggdata[i].dtahe_size = agg->dtagd_size; + break; + } + + if (zaggdata[i].dtahe_size == 0) { + caddr_t data; + + /* + * We couldn't find this aggregation, meaning + * that we have never seen it before for any + * tuple _and_ this is an anonymous enabling. + * That is, we're in the obscure case outlined + * above. In this case, our defined behavior + * is to format the data in the format of the + * first non-zero aggregation -- of which, of + * course, we know there to be at least one + * (or nentries would have been zero). + */ + for (j = 0; j < naggvars; j++) { + if (zaggdata[j].dtahe_size != 0) + break; + } + + assert(j < naggvars); + zaggdata[i] = zaggdata[j]; + + data = zaggdata[i].dtahe_data.dtada_data; + assert(data != NULL); + } + } + } + + /* + * Now we need to allocate our zero-filled data for use for + * aggregations that don't have a value corresponding to a given key. + */ + for (i = 0; i < naggvars; i++) { + dtrace_aggdata_t *aggdata = &zaggdata[i].dtahe_data; + dtrace_aggdesc_t *aggdesc = aggdata->dtada_desc; + dtrace_recdesc_t *rec; + uint64_t larg; + caddr_t zdata; + + zsize = zaggdata[i].dtahe_size; + assert(zsize != 0); + + if ((zdata = dt_zalloc(dtp, zsize)) == NULL) { + /* + * If we failed to allocated some zero-filled data, we + * need to zero out the remaining dtada_data pointers + * to prevent the wrong data from being freed below. + */ + for (j = i; j < naggvars; j++) + zaggdata[j].dtahe_data.dtada_data = NULL; + goto out; + } + + aggvar = aggvars[(i - sortpos + naggvars) % naggvars]; + + /* + * First, the easy bit. To maintain compatibility with + * consumers that pull the compiler-generated ID out of the + * data, we put that ID at the top of the zero-filled data. + */ + rec = &aggdesc->dtagd_rec[0]; + /* LINTED - alignment */ + *((dtrace_aggvarid_t *)(zdata + rec->dtrd_offset)) = aggvar; + + rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; + + /* + * Now for the more complicated part. If (and only if) this + * is an lquantize() aggregating action, zero-filled data is + * not equivalent to an empty record: we must also get the + * parameters for the lquantize(). + */ + if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) { + if (aggdata->dtada_data != NULL) { + /* + * The easier case here is if we actually have + * some prototype data -- in which case we + * manually dig it out of the aggregation + * record. + */ + /* LINTED - alignment */ + larg = *((uint64_t *)(aggdata->dtada_data + + rec->dtrd_offset)); + } else { + /* + * We don't have any prototype data. As a + * result, we know that we _do_ have the + * compiler-generated information. (If this + * were an anonymous enabling, all of our + * zero-filled data would have prototype data + * -- either directly or indirectly.) So as + * gross as it is, we'll grovel around in the + * compiler-generated information to find the + * lquantize() parameters. + */ + dtrace_stmtdesc_t *sdp; + dt_ident_t *aid; + dt_idsig_t *isp; + + sdp = (dtrace_stmtdesc_t *)(uintptr_t) + aggdesc->dtagd_rec[0].dtrd_uarg; + aid = sdp->dtsd_aggdata; + isp = (dt_idsig_t *)aid->di_data; + assert(isp->dis_auxinfo != 0); + larg = isp->dis_auxinfo; + } + + /* LINTED - alignment */ + *((uint64_t *)(zdata + rec->dtrd_offset)) = larg; + } + + aggdata->dtada_data = zdata; + } + + /* + * Now that we've dealt with setting up our zero-filled data, we can + * allocate our sorted array, and take another pass over the data to + * fill it. + */ + sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *)); + + if (sorted == NULL) + goto out; + + for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) { + dtrace_aggvarid_t id; + + if ((id = dt_aggregate_aggvarid(h)) > max || !map[id]) + continue; + + sorted[i++] = h; + } + + assert(i == nentries); + + /* + * We've loaded our array; now we need to sort by value to allow us + * to create bundles of like value. We're going to acquire the + * dt_qsort_lock here, and hold it across all of our subsequent + * comparison and sorting. + */ + (void) pthread_mutex_lock(&dt_qsort_lock); + + qsort(sorted, nentries, sizeof (dt_ahashent_t *), + dt_aggregate_keyvarcmp); + + /* + * Now we need to go through and create bundles. Because the number + * of bundles is bounded by the size of the sorted array, we're going + * to reuse the underlying storage. And note that "bundle" is an + * array of pointers to arrays of pointers to dt_ahashent_t -- making + * its type (regrettably) "dt_ahashent_t ***". (Regrettable because + * '*' -- like '_' and 'X' -- should never appear in triplicate in + * an ideal world.) + */ + bundle = (dt_ahashent_t ***)sorted; + + for (i = 1, start = 0; i <= nentries; i++) { + if (i < nentries && + dt_aggregate_keycmp(&sorted[i], &sorted[i - 1]) == 0) + continue; + + /* + * We have a bundle boundary. Everything from start to + * (i - 1) belongs in one bundle. + */ + assert(i - start <= naggvars); + bundlesize = (naggvars + 2) * sizeof (dt_ahashent_t *); + + if ((nbundle = dt_zalloc(dtp, bundlesize)) == NULL) { + (void) pthread_mutex_unlock(&dt_qsort_lock); + goto out; + } + + for (j = start; j < i; j++) { + dtrace_aggvarid_t id = dt_aggregate_aggvarid(sorted[j]); + + assert(id <= max); + assert(map[id] != 0); + assert(map[id] - 1 < naggvars); + assert(nbundle[map[id] - 1] == NULL); + nbundle[map[id] - 1] = sorted[j]; + + if (nbundle[naggvars] == NULL) + nbundle[naggvars] = sorted[j]; + } + + for (j = 0; j < naggvars; j++) { + if (nbundle[j] != NULL) + continue; + + /* + * Before we assume that this aggregation variable + * isn't present (and fall back to using the + * zero-filled data allocated earlier), check the + * remap. If we have a remapping, we'll drop it in + * here. Note that we might be remapping an + * aggregation variable that isn't present for this + * key; in this case, the aggregation data that we + * copy will point to the zeroed data. + */ + if (remap != NULL && remap[j]) { + assert(remap[j] - 1 < j); + assert(nbundle[remap[j] - 1] != NULL); + nbundle[j] = nbundle[remap[j] - 1]; + } else { + nbundle[j] = &zaggdata[j]; + } + } + + bundle[nbundles++] = nbundle; + start = i; + } + + /* + * Now we need to re-sort based on the first value. + */ + dt_aggregate_qsort(dtp, bundle, nbundles, sizeof (dt_ahashent_t **), + dt_aggregate_bundlecmp); + + (void) pthread_mutex_unlock(&dt_qsort_lock); + + /* + * We're done! Now we just need to go back over the sorted bundles, + * calling the function. + */ + data = alloca((naggvars + 1) * sizeof (dtrace_aggdata_t *)); + + for (i = 0; i < nbundles; i++) { + for (j = 0; j < naggvars; j++) + data[j + 1] = NULL; + + for (j = 0; j < naggvars; j++) { + int ndx = j - sortpos; + + if (ndx < 0) + ndx += naggvars; + + assert(bundle[i][ndx] != NULL); + data[j + 1] = &bundle[i][ndx]->dtahe_data; + } + + for (j = 0; j < naggvars; j++) + assert(data[j + 1] != NULL); + + /* + * The representative key is the last element in the bundle. + * Assert that we have one, and then set it to be the first + * element of data. + */ + assert(bundle[i][j] != NULL); + data[0] = &bundle[i][j]->dtahe_data; + + if ((rval = func(data, naggvars + 1, arg)) == -1) + goto out; + } + + rval = 0; +out: + for (i = 0; i < nbundles; i++) + dt_free(dtp, bundle[i]); + + if (zaggdata != NULL) { + for (i = 0; i < naggvars; i++) + dt_free(dtp, zaggdata[i].dtahe_data.dtada_data); + } + + dt_free(dtp, zaggdata); + dt_free(dtp, sorted); + dt_free(dtp, remap); + dt_free(dtp, map); + + return (rval); +} + +int +dtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp, + dtrace_aggregate_walk_f *func) +{ + dt_print_aggdata_t pd; + + bzero(&pd, sizeof (pd)); + + pd.dtpa_dtp = dtp; + pd.dtpa_fp = fp; + pd.dtpa_allunprint = 1; + + if (func == NULL) + func = dtrace_aggregate_walk_sorted; + + if ((*func)(dtp, dt_print_agg, &pd) == -1) + return (dt_set_errno(dtp, dtp->dt_errno)); + + return (0); +} + +void +dtrace_aggregate_clear(dtrace_hdl_t *dtp) +{ + dt_aggregate_t *agp = &dtp->dt_aggregate; + dt_ahash_t *hash = &agp->dtat_hash; + dt_ahashent_t *h; + dtrace_aggdata_t *data; + dtrace_aggdesc_t *aggdesc; + dtrace_recdesc_t *rec; + int i, max_cpus = agp->dtat_maxcpu; + + for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { + aggdesc = h->dtahe_data.dtada_desc; + rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; + data = &h->dtahe_data; + + bzero(&data->dtada_data[rec->dtrd_offset], rec->dtrd_size); + + if (data->dtada_percpu == NULL) + continue; + + for (i = 0; i < max_cpus; i++) + bzero(data->dtada_percpu[i], rec->dtrd_size); + } +} + +void +dt_aggregate_destroy(dtrace_hdl_t *dtp) +{ + dt_aggregate_t *agp = &dtp->dt_aggregate; + dt_ahash_t *hash = &agp->dtat_hash; + dt_ahashent_t *h, *next; + dtrace_aggdata_t *aggdata; + int i, max_cpus = agp->dtat_maxcpu; + + if (hash->dtah_hash == NULL) { + assert(hash->dtah_all == NULL); + } else { + free(hash->dtah_hash); + + for (h = hash->dtah_all; h != NULL; h = next) { + next = h->dtahe_nextall; + + aggdata = &h->dtahe_data; + + if (aggdata->dtada_percpu != NULL) { + for (i = 0; i < max_cpus; i++) + free(aggdata->dtada_percpu[i]); + free(aggdata->dtada_percpu); + } + + free(aggdata->dtada_data); + free(h); + } + + hash->dtah_hash = NULL; + hash->dtah_all = NULL; + hash->dtah_size = 0; + } + + free(agp->dtat_buf.dtbd_data); + free(agp->dtat_cpus); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_as.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_as.c new file mode 100644 index 0000000..f937261 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_as.c @@ -0,0 +1,503 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. + */ + +#include <sys/types.h> +#include <strings.h> +#include <stdlib.h> +#include <assert.h> + +#include <dt_impl.h> +#include <dt_parser.h> +#include <dt_as.h> + +void +dt_irlist_create(dt_irlist_t *dlp) +{ + bzero(dlp, sizeof (dt_irlist_t)); + dlp->dl_label = 1; +} + +void +dt_irlist_destroy(dt_irlist_t *dlp) +{ + dt_irnode_t *dip, *nip; + + for (dip = dlp->dl_list; dip != NULL; dip = nip) { + nip = dip->di_next; + free(dip); + } +} + +void +dt_irlist_append(dt_irlist_t *dlp, dt_irnode_t *dip) +{ + if (dlp->dl_last != NULL) + dlp->dl_last->di_next = dip; + else + dlp->dl_list = dip; + + dlp->dl_last = dip; + + if (dip->di_label == DT_LBL_NONE || dip->di_instr != DIF_INSTR_NOP) + dlp->dl_len++; /* don't count forward refs in instr count */ +} + +uint_t +dt_irlist_label(dt_irlist_t *dlp) +{ + return (dlp->dl_label++); +} + +/*ARGSUSED*/ +static int +dt_countvar(dt_idhash_t *dhp, dt_ident_t *idp, void *data) +{ + size_t *np = data; + + if (idp->di_flags & (DT_IDFLG_DIFR | DT_IDFLG_DIFW)) + (*np)++; /* include variable in vartab */ + + return (0); +} + +/*ARGSUSED*/ +static int +dt_copyvar(dt_idhash_t *dhp, dt_ident_t *idp, void *data) +{ + dt_pcb_t *pcb = data; + dtrace_difv_t *dvp; + ssize_t stroff; + dt_node_t dn; + + if (!(idp->di_flags & (DT_IDFLG_DIFR | DT_IDFLG_DIFW))) + return (0); /* omit variable from vartab */ + + dvp = &pcb->pcb_difo->dtdo_vartab[pcb->pcb_asvidx++]; + stroff = dt_strtab_insert(pcb->pcb_strtab, idp->di_name); + + if (stroff == -1L) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + if (stroff > DIF_STROFF_MAX) + longjmp(pcb->pcb_jmpbuf, EDT_STR2BIG); + + dvp->dtdv_name = (uint_t)stroff; + dvp->dtdv_id = idp->di_id; + dvp->dtdv_flags = 0; + + dvp->dtdv_kind = (idp->di_kind == DT_IDENT_ARRAY) ? + DIFV_KIND_ARRAY : DIFV_KIND_SCALAR; + + if (idp->di_flags & DT_IDFLG_LOCAL) + dvp->dtdv_scope = DIFV_SCOPE_LOCAL; + else if (idp->di_flags & DT_IDFLG_TLS) + dvp->dtdv_scope = DIFV_SCOPE_THREAD; + else + dvp->dtdv_scope = DIFV_SCOPE_GLOBAL; + + if (idp->di_flags & DT_IDFLG_DIFR) + dvp->dtdv_flags |= DIFV_F_REF; + if (idp->di_flags & DT_IDFLG_DIFW) + dvp->dtdv_flags |= DIFV_F_MOD; + + bzero(&dn, sizeof (dn)); + dt_node_type_assign(&dn, idp->di_ctfp, idp->di_type, B_FALSE); + dt_node_diftype(pcb->pcb_hdl, &dn, &dvp->dtdv_type); + + idp->di_flags &= ~(DT_IDFLG_DIFR | DT_IDFLG_DIFW); + return (0); +} + +static ssize_t +dt_copystr(const char *s, size_t n, size_t off, dt_pcb_t *pcb) +{ + bcopy(s, pcb->pcb_difo->dtdo_strtab + off, n); + return (n); +} + +/* + * Rewrite the xlate/xlarg instruction at dtdo_buf[i] so that the instruction's + * xltab index reflects the offset 'xi' of the assigned dtdo_xlmtab[] location. + * We track the cumulative references to translators and members in the pcb's + * pcb_asxrefs[] array, a two-dimensional array of bitmaps indexed by the + * global translator id and then by the corresponding translator member id. + */ +static void +dt_as_xlate(dt_pcb_t *pcb, dtrace_difo_t *dp, + uint_t i, uint_t xi, dt_node_t *dnp) +{ + dtrace_hdl_t *dtp = pcb->pcb_hdl; + dt_xlator_t *dxp = dnp->dn_membexpr->dn_xlator; + + assert(i < dp->dtdo_len); + assert(xi < dp->dtdo_xlmlen); + + assert(dnp->dn_kind == DT_NODE_MEMBER); + assert(dnp->dn_membexpr->dn_kind == DT_NODE_XLATOR); + + assert(dxp->dx_id < dtp->dt_xlatorid); + assert(dnp->dn_membid < dxp->dx_nmembers); + + if (pcb->pcb_asxrefs == NULL) { + pcb->pcb_asxreflen = dtp->dt_xlatorid; + pcb->pcb_asxrefs = + dt_zalloc(dtp, sizeof (ulong_t *) * pcb->pcb_asxreflen); + if (pcb->pcb_asxrefs == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + } + + if (pcb->pcb_asxrefs[dxp->dx_id] == NULL) { + pcb->pcb_asxrefs[dxp->dx_id] = + dt_zalloc(dtp, BT_SIZEOFMAP(dxp->dx_nmembers)); + if (pcb->pcb_asxrefs[dxp->dx_id] == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + } + + dp->dtdo_buf[i] = DIF_INSTR_XLATE( + DIF_INSTR_OP(dp->dtdo_buf[i]), xi, DIF_INSTR_RD(dp->dtdo_buf[i])); + + BT_SET(pcb->pcb_asxrefs[dxp->dx_id], dnp->dn_membid); + dp->dtdo_xlmtab[xi] = dnp; +} + +static void +dt_as_undef(const dt_ident_t *idp, uint_t offset) +{ + const char *kind, *mark = (idp->di_flags & DT_IDFLG_USER) ? "``" : "`"; + const dtrace_syminfo_t *dts = idp->di_data; + + if (idp->di_flags & DT_IDFLG_USER) + kind = "user"; + else if (idp->di_flags & DT_IDFLG_PRIM) + kind = "primary kernel"; + else + kind = "loadable kernel"; + + yylineno = idp->di_lineno; + + xyerror(D_ASRELO, "relocation remains against %s symbol %s%s%s (offset " + "0x%x)\n", kind, dts->dts_object, mark, dts->dts_name, offset); +} + +dtrace_difo_t * +dt_as(dt_pcb_t *pcb) +{ + dtrace_hdl_t *dtp = pcb->pcb_hdl; + dt_irlist_t *dlp = &pcb->pcb_ir; + uint_t *labels = NULL; + dt_irnode_t *dip; + dtrace_difo_t *dp; + dt_ident_t *idp; + + size_t n = 0; + uint_t i; + + uint_t kmask, kbits, umask, ubits; + uint_t krel = 0, urel = 0, xlrefs = 0; + + /* + * Select bitmasks based upon the desired symbol linking policy. We + * test (di_extern->di_flags & xmask) == xbits to determine if the + * symbol should have a relocation entry generated in the loop below. + * + * DT_LINK_KERNEL = kernel symbols static, user symbols dynamic + * DT_LINK_PRIMARY = primary kernel symbols static, others dynamic + * DT_LINK_DYNAMIC = all symbols dynamic + * DT_LINK_STATIC = all symbols static + * + * By 'static' we mean that we use the symbol's value at compile-time + * in the final DIF. By 'dynamic' we mean that we create a relocation + * table entry for the symbol's value so it can be relocated later. + */ + switch (dtp->dt_linkmode) { + case DT_LINK_KERNEL: + kmask = 0; + kbits = -1u; + umask = DT_IDFLG_USER; + ubits = DT_IDFLG_USER; + break; + case DT_LINK_PRIMARY: + kmask = DT_IDFLG_USER | DT_IDFLG_PRIM; + kbits = 0; + umask = DT_IDFLG_USER; + ubits = DT_IDFLG_USER; + break; + case DT_LINK_DYNAMIC: + kmask = DT_IDFLG_USER; + kbits = 0; + umask = DT_IDFLG_USER; + ubits = DT_IDFLG_USER; + break; + case DT_LINK_STATIC: + kmask = umask = 0; + kbits = ubits = -1u; + break; + default: + xyerror(D_UNKNOWN, "internal error -- invalid link mode %u\n", + dtp->dt_linkmode); + } + + assert(pcb->pcb_difo == NULL); + pcb->pcb_difo = dt_zalloc(dtp, sizeof (dtrace_difo_t)); + + if ((dp = pcb->pcb_difo) == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + dp->dtdo_buf = dt_alloc(dtp, sizeof (dif_instr_t) * dlp->dl_len); + + if (dp->dtdo_buf == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + if ((labels = dt_alloc(dtp, sizeof (uint_t) * dlp->dl_label)) == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + /* + * Make an initial pass through the instruction list, filling in the + * instruction buffer with valid instructions and skipping labeled nops. + * While doing this, we also fill in our labels[] translation table + * and we count up the number of relocation table entries we will need. + */ + for (i = 0, dip = dlp->dl_list; dip != NULL; dip = dip->di_next) { + if (dip->di_label != DT_LBL_NONE) + labels[dip->di_label] = i; + + if (dip->di_label == DT_LBL_NONE || + dip->di_instr != DIF_INSTR_NOP) + dp->dtdo_buf[i++] = dip->di_instr; + + if (dip->di_extern == NULL) + continue; /* no external references needed */ + + switch (DIF_INSTR_OP(dip->di_instr)) { + case DIF_OP_SETX: + idp = dip->di_extern; + if ((idp->di_flags & kmask) == kbits) + krel++; + else if ((idp->di_flags & umask) == ubits) + urel++; + break; + case DIF_OP_XLATE: + case DIF_OP_XLARG: + xlrefs++; + break; + default: + xyerror(D_UNKNOWN, "unexpected assembler relocation " + "for opcode 0x%x\n", DIF_INSTR_OP(dip->di_instr)); + } + } + + assert(i == dlp->dl_len); + dp->dtdo_len = dlp->dl_len; + + /* + * Make a second pass through the instructions, relocating each branch + * label to the index of the final instruction in the buffer and noting + * any other instruction-specific DIFO flags such as dtdo_destructive. + */ + for (i = 0; i < dp->dtdo_len; i++) { + dif_instr_t instr = dp->dtdo_buf[i]; + uint_t op = DIF_INSTR_OP(instr); + + if (op == DIF_OP_CALL) { + if (DIF_INSTR_SUBR(instr) == DIF_SUBR_COPYOUT || + DIF_INSTR_SUBR(instr) == DIF_SUBR_COPYOUTSTR) + dp->dtdo_destructive = 1; + continue; + } + + if (op >= DIF_OP_BA && op <= DIF_OP_BLEU) { + assert(DIF_INSTR_LABEL(instr) < dlp->dl_label); + dp->dtdo_buf[i] = DIF_INSTR_BRANCH(op, + labels[DIF_INSTR_LABEL(instr)]); + } + } + + dt_free(dtp, labels); + pcb->pcb_asvidx = 0; + + /* + * Allocate memory for the appropriate number of variable records and + * then fill in each variable record. As we populate the variable + * table we insert the corresponding variable names into the strtab. + */ + (void) dt_idhash_iter(dtp->dt_tls, dt_countvar, &n); + (void) dt_idhash_iter(dtp->dt_globals, dt_countvar, &n); + (void) dt_idhash_iter(pcb->pcb_locals, dt_countvar, &n); + + if (n != 0) { + dp->dtdo_vartab = dt_alloc(dtp, n * sizeof (dtrace_difv_t)); + dp->dtdo_varlen = (uint32_t)n; + + if (dp->dtdo_vartab == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + (void) dt_idhash_iter(dtp->dt_tls, dt_copyvar, pcb); + (void) dt_idhash_iter(dtp->dt_globals, dt_copyvar, pcb); + (void) dt_idhash_iter(pcb->pcb_locals, dt_copyvar, pcb); + } + + /* + * Allocate memory for the appropriate number of relocation table + * entries based upon our kernel and user counts from the first pass. + */ + if (krel != 0) { + dp->dtdo_kreltab = dt_alloc(dtp, + krel * sizeof (dof_relodesc_t)); + dp->dtdo_krelen = krel; + + if (dp->dtdo_kreltab == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + } + + if (urel != 0) { + dp->dtdo_ureltab = dt_alloc(dtp, + urel * sizeof (dof_relodesc_t)); + dp->dtdo_urelen = urel; + + if (dp->dtdo_ureltab == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + } + + if (xlrefs != 0) { + dp->dtdo_xlmtab = dt_zalloc(dtp, sizeof (dt_node_t *) * xlrefs); + dp->dtdo_xlmlen = xlrefs; + + if (dp->dtdo_xlmtab == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + } + + /* + * If any relocations are needed, make another pass through the + * instruction list and fill in the relocation table entries. + */ + if (krel + urel + xlrefs != 0) { + uint_t knodef = pcb->pcb_cflags & DTRACE_C_KNODEF; + uint_t unodef = pcb->pcb_cflags & DTRACE_C_UNODEF; + + dof_relodesc_t *krp = dp->dtdo_kreltab; + dof_relodesc_t *urp = dp->dtdo_ureltab; + dt_node_t **xlp = dp->dtdo_xlmtab; + + i = 0; /* dtdo_buf[] index */ + + for (dip = dlp->dl_list; dip != NULL; dip = dip->di_next) { + dof_relodesc_t *rp; + ssize_t soff; + uint_t nodef; + + if (dip->di_label != DT_LBL_NONE && + dip->di_instr == DIF_INSTR_NOP) + continue; /* skip label declarations */ + + i++; /* advance dtdo_buf[] index */ + + if (DIF_INSTR_OP(dip->di_instr) == DIF_OP_XLATE || + DIF_INSTR_OP(dip->di_instr) == DIF_OP_XLARG) { + assert(dp->dtdo_buf[i - 1] == dip->di_instr); + dt_as_xlate(pcb, dp, i - 1, (uint_t) + (xlp++ - dp->dtdo_xlmtab), dip->di_extern); + continue; + } + + if ((idp = dip->di_extern) == NULL) + continue; /* no relocation entry needed */ + + if ((idp->di_flags & kmask) == kbits) { + nodef = knodef; + rp = krp++; + } else if ((idp->di_flags & umask) == ubits) { + nodef = unodef; + rp = urp++; + } else + continue; + + if (!nodef) + dt_as_undef(idp, i); + + assert(DIF_INSTR_OP(dip->di_instr) == DIF_OP_SETX); + soff = dt_strtab_insert(pcb->pcb_strtab, idp->di_name); + + if (soff == -1L) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + if (soff > DIF_STROFF_MAX) + longjmp(pcb->pcb_jmpbuf, EDT_STR2BIG); + + rp->dofr_name = (dof_stridx_t)soff; + rp->dofr_type = DOF_RELO_SETX; + rp->dofr_offset = DIF_INSTR_INTEGER(dip->di_instr) * + sizeof (uint64_t); + rp->dofr_data = 0; + } + + assert(krp == dp->dtdo_kreltab + dp->dtdo_krelen); + assert(urp == dp->dtdo_ureltab + dp->dtdo_urelen); + assert(xlp == dp->dtdo_xlmtab + dp->dtdo_xlmlen); + assert(i == dp->dtdo_len); + } + + /* + * Allocate memory for the compiled string table and then copy the + * chunks from the string table into the final string buffer. + */ + if ((n = dt_strtab_size(pcb->pcb_strtab)) != 0) { + if ((dp->dtdo_strtab = dt_alloc(dtp, n)) == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + (void) dt_strtab_write(pcb->pcb_strtab, + (dt_strtab_write_f *)dt_copystr, pcb); + dp->dtdo_strlen = (uint32_t)n; + } + + /* + * Allocate memory for the compiled integer table and then copy the + * integer constants from the table into the final integer buffer. + */ + if ((n = dt_inttab_size(pcb->pcb_inttab)) != 0) { + if ((dp->dtdo_inttab = dt_alloc(dtp, + n * sizeof (uint64_t))) == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + dt_inttab_write(pcb->pcb_inttab, dp->dtdo_inttab); + dp->dtdo_intlen = (uint32_t)n; + } + + /* + * Fill in the DIFO return type from the type associated with the + * node saved in pcb_dret, and then clear pcb_difo and pcb_dret + * now that the assembler has completed successfully. + */ + dt_node_diftype(dtp, pcb->pcb_dret, &dp->dtdo_rtype); + pcb->pcb_difo = NULL; + pcb->pcb_dret = NULL; + + if (pcb->pcb_cflags & DTRACE_C_DIFV) + dt_dis(dp, stderr); + + return (dp); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_as.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_as.h new file mode 100644 index 0000000..2acd940 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_as.h @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_AS_H +#define _DT_AS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/dtrace.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dt_irnode { + uint_t di_label; /* label number or DT_LBL_NONE */ + dif_instr_t di_instr; /* instruction opcode */ + void *di_extern; /* opcode-specific external reference */ + struct dt_irnode *di_next; /* next instruction */ +} dt_irnode_t; + +#define DT_LBL_NONE 0 /* no label on this instruction */ + +typedef struct dt_irlist { + dt_irnode_t *dl_list; /* pointer to first node in list */ + dt_irnode_t *dl_last; /* pointer to last node in list */ + uint_t dl_len; /* number of valid instructions */ + uint_t dl_label; /* next label number to assign */ +} dt_irlist_t; + +extern void dt_irlist_create(dt_irlist_t *); +extern void dt_irlist_destroy(dt_irlist_t *); +extern void dt_irlist_append(dt_irlist_t *, dt_irnode_t *); +extern uint_t dt_irlist_label(dt_irlist_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_AS_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_buf.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_buf.c new file mode 100644 index 0000000..324e778 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_buf.c @@ -0,0 +1,177 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * DTrace Memory Buffer Routines + * + * The routines in this file are used to create an automatically resizing + * memory buffer that can be written to like a file. Memory buffers are + * used to construct DOF to ioctl() to dtrace(7D), and provide semantics that + * simplify caller code. Specifically, any allocation errors result in an + * error code being set inside the buffer which is maintained persistently and + * propagates to another buffer if the buffer in error is concatenated. These + * semantics permit callers to execute a large series of writes without needing + * to check for errors and then perform a single check before using the buffer. + */ + +#include <sys/sysmacros.h> +#include <strings.h> + +#include <dt_impl.h> +#include <dt_buf.h> + +void +dt_buf_create(dtrace_hdl_t *dtp, dt_buf_t *bp, const char *name, size_t len) +{ + if (len == 0) + len = _dtrace_bufsize; + + bp->dbu_buf = bp->dbu_ptr = dt_zalloc(dtp, len); + bp->dbu_len = len; + + if (bp->dbu_buf == NULL) + bp->dbu_err = dtrace_errno(dtp); + else + bp->dbu_err = 0; + + bp->dbu_resizes = 0; + bp->dbu_name = name; +} + +void +dt_buf_destroy(dtrace_hdl_t *dtp, dt_buf_t *bp) +{ + dt_dprintf("dt_buf_destroy(%s): size=%lu resizes=%u\n", + bp->dbu_name, (ulong_t)bp->dbu_len, bp->dbu_resizes); + + dt_free(dtp, bp->dbu_buf); +} + +void +dt_buf_reset(dtrace_hdl_t *dtp, dt_buf_t *bp) +{ + if ((bp->dbu_ptr = bp->dbu_buf) != NULL) + bp->dbu_err = 0; + else + dt_buf_create(dtp, bp, bp->dbu_name, bp->dbu_len); +} + +void +dt_buf_write(dtrace_hdl_t *dtp, dt_buf_t *bp, + const void *buf, size_t len, size_t align) +{ + size_t off = (size_t)(bp->dbu_ptr - bp->dbu_buf); + size_t adj = roundup(off, align) - off; + + if (bp->dbu_err != 0) { + (void) dt_set_errno(dtp, bp->dbu_err); + return; /* write silently fails */ + } + + if (bp->dbu_ptr + adj + len > bp->dbu_buf + bp->dbu_len) { + size_t new_len = bp->dbu_len * 2; + uchar_t *new_buf; + uint_t r = 1; + + while (bp->dbu_ptr + adj + len > bp->dbu_buf + new_len) { + new_len *= 2; + r++; + } + + if ((new_buf = dt_zalloc(dtp, new_len)) == NULL) { + bp->dbu_err = dtrace_errno(dtp); + return; + } + + bcopy(bp->dbu_buf, new_buf, off); + dt_free(dtp, bp->dbu_buf); + + bp->dbu_buf = new_buf; + bp->dbu_ptr = new_buf + off; + bp->dbu_len = new_len; + bp->dbu_resizes += r; + } + + bp->dbu_ptr += adj; + bcopy(buf, bp->dbu_ptr, len); + bp->dbu_ptr += len; +} + +void +dt_buf_concat(dtrace_hdl_t *dtp, dt_buf_t *dst, + const dt_buf_t *src, size_t align) +{ + if (dst->dbu_err == 0 && src->dbu_err != 0) { + (void) dt_set_errno(dtp, src->dbu_err); + dst->dbu_err = src->dbu_err; + } else { + dt_buf_write(dtp, dst, src->dbu_buf, + (size_t)(src->dbu_ptr - src->dbu_buf), align); + } +} + +size_t +dt_buf_offset(const dt_buf_t *bp, size_t align) +{ + size_t off = (size_t)(bp->dbu_ptr - bp->dbu_buf); + return (roundup(off, align)); +} + +size_t +dt_buf_len(const dt_buf_t *bp) +{ + return (bp->dbu_ptr - bp->dbu_buf); +} + +int +dt_buf_error(const dt_buf_t *bp) +{ + return (bp->dbu_err); +} + +void * +dt_buf_ptr(const dt_buf_t *bp) +{ + return (bp->dbu_buf); +} + +void * +dt_buf_claim(dtrace_hdl_t *dtp, dt_buf_t *bp) +{ + void *buf = bp->dbu_buf; + + if (bp->dbu_err != 0) { + dt_free(dtp, buf); + buf = NULL; + } + + bp->dbu_buf = bp->dbu_ptr = NULL; + bp->dbu_len = 0; + + return (buf); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_buf.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_buf.h new file mode 100644 index 0000000..eb93e13 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_buf.h @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_BUF_H +#define _DT_BUF_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <dtrace.h> + +typedef struct dt_buf { + const char *dbu_name; /* string name for debugging */ + uchar_t *dbu_buf; /* buffer base address */ + uchar_t *dbu_ptr; /* current buffer location */ + size_t dbu_len; /* buffer size in bytes */ + int dbu_err; /* errno value if error */ + int dbu_resizes; /* number of resizes */ +} dt_buf_t; + +extern void dt_buf_create(dtrace_hdl_t *, dt_buf_t *, const char *, size_t); +extern void dt_buf_destroy(dtrace_hdl_t *, dt_buf_t *); +extern void dt_buf_reset(dtrace_hdl_t *, dt_buf_t *); + +extern void dt_buf_write(dtrace_hdl_t *, dt_buf_t *, + const void *, size_t, size_t); + +extern void dt_buf_concat(dtrace_hdl_t *, dt_buf_t *, + const dt_buf_t *, size_t); + +extern size_t dt_buf_offset(const dt_buf_t *, size_t); +extern size_t dt_buf_len(const dt_buf_t *); + +extern int dt_buf_error(const dt_buf_t *); +extern void *dt_buf_ptr(const dt_buf_t *); + +extern void *dt_buf_claim(dtrace_hdl_t *, dt_buf_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_BUF_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_cc.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_cc.c new file mode 100644 index 0000000..35b8f02 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_cc.c @@ -0,0 +1,2631 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * DTrace D Language Compiler + * + * The code in this source file implements the main engine for the D language + * compiler. The driver routine for the compiler is dt_compile(), below. The + * compiler operates on either stdio FILEs or in-memory strings as its input + * and can produce either dtrace_prog_t structures from a D program or a single + * dtrace_difo_t structure from a D expression. Multiple entry points are + * provided as wrappers around dt_compile() for the various input/output pairs. + * The compiler itself is implemented across the following source files: + * + * dt_lex.l - lex scanner + * dt_grammar.y - yacc grammar + * dt_parser.c - parse tree creation and semantic checking + * dt_decl.c - declaration stack processing + * dt_xlator.c - D translator lookup and creation + * dt_ident.c - identifier and symbol table routines + * dt_pragma.c - #pragma processing and D pragmas + * dt_printf.c - D printf() and printa() argument checking and processing + * dt_cc.c - compiler driver and dtrace_prog_t construction + * dt_cg.c - DIF code generator + * dt_as.c - DIF assembler + * dt_dof.c - dtrace_prog_t -> DOF conversion + * + * Several other source files provide collections of utility routines used by + * these major files. The compiler itself is implemented in multiple passes: + * + * (1) The input program is scanned and parsed by dt_lex.l and dt_grammar.y + * and parse tree nodes are constructed using the routines in dt_parser.c. + * This node construction pass is described further in dt_parser.c. + * + * (2) The parse tree is "cooked" by assigning each clause a context (see the + * routine dt_setcontext(), below) based on its probe description and then + * recursively descending the tree performing semantic checking. The cook + * routines are also implemented in dt_parser.c and described there. + * + * (3) For actions that are DIF expression statements, the DIF code generator + * and assembler are invoked to create a finished DIFO for the statement. + * + * (4) The dtrace_prog_t data structures for the program clauses and actions + * are built, containing pointers to any DIFOs created in step (3). + * + * (5) The caller invokes a routine in dt_dof.c to convert the finished program + * into DOF format for use in anonymous tracing or enabling in the kernel. + * + * In the implementation, steps 2-4 are intertwined in that they are performed + * in order for each clause as part of a loop that executes over the clauses. + * + * The D compiler currently implements nearly no optimization. The compiler + * implements integer constant folding as part of pass (1), and a set of very + * simple peephole optimizations as part of pass (3). As with any C compiler, + * a large number of optimizations are possible on both the intermediate data + * structures and the generated DIF code. These possibilities should be + * investigated in the context of whether they will have any substantive effect + * on the overall DTrace probe effect before they are undertaken. + */ + +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/sysmacros.h> + +#include <assert.h> +#include <string.h> +#include <strings.h> +#include <signal.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <ucontext.h> +#include <limits.h> +#include <ctype.h> +#include <dirent.h> +#include <dt_module.h> +#include <dt_program.h> +#include <dt_provider.h> +#include <dt_printf.h> +#include <dt_pid.h> +#include <dt_grammar.h> +#include <dt_ident.h> +#include <dt_string.h> +#include <dt_impl.h> + +static const dtrace_diftype_t dt_void_rtype = { + DIF_TYPE_CTF, CTF_K_INTEGER, 0, 0, 0 +}; + +static const dtrace_diftype_t dt_int_rtype = { + DIF_TYPE_CTF, CTF_K_INTEGER, 0, 0, sizeof (uint64_t) +}; + +static void *dt_compile(dtrace_hdl_t *, int, dtrace_probespec_t, void *, + uint_t, int, char *const[], FILE *, const char *); + + +/*ARGSUSED*/ +static int +dt_idreset(dt_idhash_t *dhp, dt_ident_t *idp, void *ignored) +{ + idp->di_flags &= ~(DT_IDFLG_REF | DT_IDFLG_MOD | + DT_IDFLG_DIFR | DT_IDFLG_DIFW); + return (0); +} + +/*ARGSUSED*/ +static int +dt_idpragma(dt_idhash_t *dhp, dt_ident_t *idp, void *ignored) +{ + yylineno = idp->di_lineno; + xyerror(D_PRAGMA_UNUSED, "unused #pragma %s\n", (char *)idp->di_iarg); + return (0); +} + +static dtrace_stmtdesc_t * +dt_stmt_create(dtrace_hdl_t *dtp, dtrace_ecbdesc_t *edp, + dtrace_attribute_t descattr, dtrace_attribute_t stmtattr) +{ + dtrace_stmtdesc_t *sdp = dtrace_stmt_create(dtp, edp); + + if (sdp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + assert(yypcb->pcb_stmt == NULL); + yypcb->pcb_stmt = sdp; + + sdp->dtsd_descattr = descattr; + sdp->dtsd_stmtattr = stmtattr; + + return (sdp); +} + +static dtrace_actdesc_t * +dt_stmt_action(dtrace_hdl_t *dtp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *new; + + if ((new = dtrace_stmt_action(dtp, sdp)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + return (new); +} + +/* + * Utility function to determine if a given action description is destructive. + * The dtdo_destructive bit is set for us by the DIF assembler (see dt_as.c). + */ +static int +dt_action_destructive(const dtrace_actdesc_t *ap) +{ + return (DTRACEACT_ISDESTRUCTIVE(ap->dtad_kind) || (ap->dtad_kind == + DTRACEACT_DIFEXPR && ap->dtad_difo->dtdo_destructive)); +} + +static void +dt_stmt_append(dtrace_stmtdesc_t *sdp, const dt_node_t *dnp) +{ + dtrace_ecbdesc_t *edp = sdp->dtsd_ecbdesc; + dtrace_actdesc_t *ap, *tap; + int commit = 0; + int speculate = 0; + int datarec = 0; + + /* + * Make sure that the new statement jibes with the rest of the ECB. + */ + for (ap = edp->dted_action; ap != NULL; ap = ap->dtad_next) { + if (ap->dtad_kind == DTRACEACT_COMMIT) { + if (commit) { + dnerror(dnp, D_COMM_COMM, "commit( ) may " + "not follow commit( )\n"); + } + + if (datarec) { + dnerror(dnp, D_COMM_DREC, "commit( ) may " + "not follow data-recording action(s)\n"); + } + + for (tap = ap; tap != NULL; tap = tap->dtad_next) { + if (!DTRACEACT_ISAGG(tap->dtad_kind)) + continue; + + dnerror(dnp, D_AGG_COMM, "aggregating actions " + "may not follow commit( )\n"); + } + + commit = 1; + continue; + } + + if (ap->dtad_kind == DTRACEACT_SPECULATE) { + if (speculate) { + dnerror(dnp, D_SPEC_SPEC, "speculate( ) may " + "not follow speculate( )\n"); + } + + if (commit) { + dnerror(dnp, D_SPEC_COMM, "speculate( ) may " + "not follow commit( )\n"); + } + + if (datarec) { + dnerror(dnp, D_SPEC_DREC, "speculate( ) may " + "not follow data-recording action(s)\n"); + } + + speculate = 1; + continue; + } + + if (DTRACEACT_ISAGG(ap->dtad_kind)) { + if (speculate) { + dnerror(dnp, D_AGG_SPEC, "aggregating actions " + "may not follow speculate( )\n"); + } + + datarec = 1; + continue; + } + + if (speculate) { + if (dt_action_destructive(ap)) { + dnerror(dnp, D_ACT_SPEC, "destructive actions " + "may not follow speculate( )\n"); + } + + if (ap->dtad_kind == DTRACEACT_EXIT) { + dnerror(dnp, D_EXIT_SPEC, "exit( ) may not " + "follow speculate( )\n"); + } + } + + /* + * Exclude all non data-recording actions. + */ + if (dt_action_destructive(ap) || + ap->dtad_kind == DTRACEACT_DISCARD) + continue; + + if (ap->dtad_kind == DTRACEACT_DIFEXPR && + ap->dtad_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_CTF && + ap->dtad_difo->dtdo_rtype.dtdt_size == 0) + continue; + + if (commit) { + dnerror(dnp, D_DREC_COMM, "data-recording actions " + "may not follow commit( )\n"); + } + + if (!speculate) + datarec = 1; + } + + if (dtrace_stmt_add(yypcb->pcb_hdl, yypcb->pcb_prog, sdp) != 0) + longjmp(yypcb->pcb_jmpbuf, dtrace_errno(yypcb->pcb_hdl)); + + if (yypcb->pcb_stmt == sdp) + yypcb->pcb_stmt = NULL; +} + +/* + * For the first element of an aggregation tuple or for printa(), we create a + * simple DIF program that simply returns the immediate value that is the ID + * of the aggregation itself. This could be optimized in the future by + * creating a new in-kernel dtad_kind that just returns an integer. + */ +static void +dt_action_difconst(dtrace_actdesc_t *ap, uint_t id, dtrace_actkind_t kind) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dtrace_difo_t *dp = dt_zalloc(dtp, sizeof (dtrace_difo_t)); + + if (dp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dp->dtdo_buf = dt_alloc(dtp, sizeof (dif_instr_t) * 2); + dp->dtdo_inttab = dt_alloc(dtp, sizeof (uint64_t)); + + if (dp->dtdo_buf == NULL || dp->dtdo_inttab == NULL) { + dt_difo_free(dtp, dp); + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + } + + dp->dtdo_buf[0] = DIF_INSTR_SETX(0, 1); /* setx DIF_INTEGER[0], %r1 */ + dp->dtdo_buf[1] = DIF_INSTR_RET(1); /* ret %r1 */ + dp->dtdo_len = 2; + dp->dtdo_inttab[0] = id; + dp->dtdo_intlen = 1; + dp->dtdo_rtype = dt_int_rtype; + + ap->dtad_difo = dp; + ap->dtad_kind = kind; +} + +static void +dt_action_clear(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dt_ident_t *aid; + dtrace_actdesc_t *ap; + dt_node_t *anp; + + char n[DT_TYPE_NAMELEN]; + int argc = 0; + + for (anp = dnp->dn_args; anp != NULL; anp = anp->dn_list) + argc++; /* count up arguments for error messages below */ + + if (argc != 1) { + dnerror(dnp, D_CLEAR_PROTO, + "%s( ) prototype mismatch: %d args passed, 1 expected\n", + dnp->dn_ident->di_name, argc); + } + + anp = dnp->dn_args; + assert(anp != NULL); + + if (anp->dn_kind != DT_NODE_AGG) { + dnerror(dnp, D_CLEAR_AGGARG, + "%s( ) argument #1 is incompatible with prototype:\n" + "\tprototype: aggregation\n\t argument: %s\n", + dnp->dn_ident->di_name, + dt_node_type_name(anp, n, sizeof (n))); + } + + aid = anp->dn_ident; + + if (aid->di_gen == dtp->dt_gen && !(aid->di_flags & DT_IDFLG_MOD)) { + dnerror(dnp, D_CLEAR_AGGBAD, + "undefined aggregation: @%s\n", aid->di_name); + } + + ap = dt_stmt_action(dtp, sdp); + dt_action_difconst(ap, anp->dn_ident->di_id, DTRACEACT_LIBACT); + ap->dtad_arg = DT_ACT_CLEAR; +} + +static void +dt_action_normalize(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dt_ident_t *aid; + dtrace_actdesc_t *ap; + dt_node_t *anp, *normal; + int denormal = (strcmp(dnp->dn_ident->di_name, "denormalize") == 0); + + char n[DT_TYPE_NAMELEN]; + int argc = 0; + + for (anp = dnp->dn_args; anp != NULL; anp = anp->dn_list) + argc++; /* count up arguments for error messages below */ + + if ((denormal && argc != 1) || (!denormal && argc != 2)) { + dnerror(dnp, D_NORMALIZE_PROTO, + "%s( ) prototype mismatch: %d args passed, %d expected\n", + dnp->dn_ident->di_name, argc, denormal ? 1 : 2); + } + + anp = dnp->dn_args; + assert(anp != NULL); + + if (anp->dn_kind != DT_NODE_AGG) { + dnerror(dnp, D_NORMALIZE_AGGARG, + "%s( ) argument #1 is incompatible with prototype:\n" + "\tprototype: aggregation\n\t argument: %s\n", + dnp->dn_ident->di_name, + dt_node_type_name(anp, n, sizeof (n))); + } + + if ((normal = anp->dn_list) != NULL && !dt_node_is_scalar(normal)) { + dnerror(dnp, D_NORMALIZE_SCALAR, + "%s( ) argument #2 must be of scalar type\n", + dnp->dn_ident->di_name); + } + + aid = anp->dn_ident; + + if (aid->di_gen == dtp->dt_gen && !(aid->di_flags & DT_IDFLG_MOD)) { + dnerror(dnp, D_NORMALIZE_AGGBAD, + "undefined aggregation: @%s\n", aid->di_name); + } + + ap = dt_stmt_action(dtp, sdp); + dt_action_difconst(ap, anp->dn_ident->di_id, DTRACEACT_LIBACT); + + if (denormal) { + ap->dtad_arg = DT_ACT_DENORMALIZE; + return; + } + + ap->dtad_arg = DT_ACT_NORMALIZE; + + assert(normal != NULL); + ap = dt_stmt_action(dtp, sdp); + dt_cg(yypcb, normal); + + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_LIBACT; + ap->dtad_arg = DT_ACT_NORMALIZE; +} + +static void +dt_action_trunc(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dt_ident_t *aid; + dtrace_actdesc_t *ap; + dt_node_t *anp, *trunc; + + char n[DT_TYPE_NAMELEN]; + int argc = 0; + + for (anp = dnp->dn_args; anp != NULL; anp = anp->dn_list) + argc++; /* count up arguments for error messages below */ + + if (argc > 2 || argc < 1) { + dnerror(dnp, D_TRUNC_PROTO, + "%s( ) prototype mismatch: %d args passed, %s expected\n", + dnp->dn_ident->di_name, argc, + argc < 1 ? "at least 1" : "no more than 2"); + } + + anp = dnp->dn_args; + assert(anp != NULL); + trunc = anp->dn_list; + + if (anp->dn_kind != DT_NODE_AGG) { + dnerror(dnp, D_TRUNC_AGGARG, + "%s( ) argument #1 is incompatible with prototype:\n" + "\tprototype: aggregation\n\t argument: %s\n", + dnp->dn_ident->di_name, + dt_node_type_name(anp, n, sizeof (n))); + } + + if (argc == 2) { + assert(trunc != NULL); + if (!dt_node_is_scalar(trunc)) { + dnerror(dnp, D_TRUNC_SCALAR, + "%s( ) argument #2 must be of scalar type\n", + dnp->dn_ident->di_name); + } + } + + aid = anp->dn_ident; + + if (aid->di_gen == dtp->dt_gen && !(aid->di_flags & DT_IDFLG_MOD)) { + dnerror(dnp, D_TRUNC_AGGBAD, + "undefined aggregation: @%s\n", aid->di_name); + } + + ap = dt_stmt_action(dtp, sdp); + dt_action_difconst(ap, anp->dn_ident->di_id, DTRACEACT_LIBACT); + ap->dtad_arg = DT_ACT_TRUNC; + + ap = dt_stmt_action(dtp, sdp); + + if (argc == 1) { + dt_action_difconst(ap, 0, DTRACEACT_LIBACT); + } else { + assert(trunc != NULL); + dt_cg(yypcb, trunc); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_LIBACT; + } + + ap->dtad_arg = DT_ACT_TRUNC; +} + +static void +dt_action_printa(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dt_ident_t *aid, *fid; + dtrace_actdesc_t *ap; + const char *format; + dt_node_t *anp, *proto = NULL; + + char n[DT_TYPE_NAMELEN]; + int argc = 0, argr = 0; + + for (anp = dnp->dn_args; anp != NULL; anp = anp->dn_list) + argc++; /* count up arguments for error messages below */ + + switch (dnp->dn_args->dn_kind) { + case DT_NODE_STRING: + format = dnp->dn_args->dn_string; + anp = dnp->dn_args->dn_list; + argr = 2; + break; + case DT_NODE_AGG: + format = NULL; + anp = dnp->dn_args; + argr = 1; + break; + default: + format = NULL; + anp = dnp->dn_args; + argr = 1; + } + + if (argc < argr) { + dnerror(dnp, D_PRINTA_PROTO, + "%s( ) prototype mismatch: %d args passed, %d expected\n", + dnp->dn_ident->di_name, argc, argr); + } + + assert(anp != NULL); + + while (anp != NULL) { + if (anp->dn_kind != DT_NODE_AGG) { + dnerror(dnp, D_PRINTA_AGGARG, + "%s( ) argument #%d is incompatible with " + "prototype:\n\tprototype: aggregation\n" + "\t argument: %s\n", dnp->dn_ident->di_name, argr, + dt_node_type_name(anp, n, sizeof (n))); + } + + aid = anp->dn_ident; + fid = aid->di_iarg; + + if (aid->di_gen == dtp->dt_gen && + !(aid->di_flags & DT_IDFLG_MOD)) { + dnerror(dnp, D_PRINTA_AGGBAD, + "undefined aggregation: @%s\n", aid->di_name); + } + + /* + * If we have multiple aggregations, we must be sure that + * their key signatures match. + */ + if (proto != NULL) { + dt_printa_validate(proto, anp); + } else { + proto = anp; + } + + if (format != NULL) { + yylineno = dnp->dn_line; + + sdp->dtsd_fmtdata = + dt_printf_create(yypcb->pcb_hdl, format); + dt_printf_validate(sdp->dtsd_fmtdata, + DT_PRINTF_AGGREGATION, dnp->dn_ident, 1, + fid->di_id, ((dt_idsig_t *)aid->di_data)->dis_args); + format = NULL; + } + + ap = dt_stmt_action(dtp, sdp); + dt_action_difconst(ap, anp->dn_ident->di_id, DTRACEACT_PRINTA); + + anp = anp->dn_list; + argr++; + } +} + +static void +dt_action_printflike(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp, + dtrace_actkind_t kind) +{ + dt_node_t *anp, *arg1; + dtrace_actdesc_t *ap = NULL; + char n[DT_TYPE_NAMELEN], *str; + + assert(DTRACEACT_ISPRINTFLIKE(kind)); + + if (dnp->dn_args->dn_kind != DT_NODE_STRING) { + dnerror(dnp, D_PRINTF_ARG_FMT, + "%s( ) argument #1 is incompatible with prototype:\n" + "\tprototype: string constant\n\t argument: %s\n", + dnp->dn_ident->di_name, + dt_node_type_name(dnp->dn_args, n, sizeof (n))); + } + + arg1 = dnp->dn_args->dn_list; + yylineno = dnp->dn_line; + str = dnp->dn_args->dn_string; + + + /* + * If this is an freopen(), we use an empty string to denote that + * stdout should be restored. For other printf()-like actions, an + * empty format string is illegal: an empty format string would + * result in malformed DOF, and the compiler thus flags an empty + * format string as a compile-time error. To avoid propagating the + * freopen() special case throughout the system, we simply transpose + * an empty string into a sentinel string (DT_FREOPEN_RESTORE) that + * denotes that stdout should be restored. + */ + if (kind == DTRACEACT_FREOPEN) { + if (strcmp(str, DT_FREOPEN_RESTORE) == 0) { + /* + * Our sentinel is always an invalid argument to + * freopen(), but if it's been manually specified, we + * must fail now instead of when the freopen() is + * actually evaluated. + */ + dnerror(dnp, D_FREOPEN_INVALID, + "%s( ) argument #1 cannot be \"%s\"\n", + dnp->dn_ident->di_name, DT_FREOPEN_RESTORE); + } + + if (str[0] == '\0') + str = DT_FREOPEN_RESTORE; + } + + sdp->dtsd_fmtdata = dt_printf_create(dtp, str); + + dt_printf_validate(sdp->dtsd_fmtdata, DT_PRINTF_EXACTLEN, + dnp->dn_ident, 1, DTRACEACT_AGGREGATION, arg1); + + if (arg1 == NULL) { + dif_instr_t *dbuf; + dtrace_difo_t *dp; + + if ((dbuf = dt_alloc(dtp, sizeof (dif_instr_t))) == NULL || + (dp = dt_zalloc(dtp, sizeof (dtrace_difo_t))) == NULL) { + dt_free(dtp, dbuf); + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + } + + dbuf[0] = DIF_INSTR_RET(DIF_REG_R0); /* ret %r0 */ + + dp->dtdo_buf = dbuf; + dp->dtdo_len = 1; + dp->dtdo_rtype = dt_int_rtype; + + ap = dt_stmt_action(dtp, sdp); + ap->dtad_difo = dp; + ap->dtad_kind = kind; + return; + } + + for (anp = arg1; anp != NULL; anp = anp->dn_list) { + ap = dt_stmt_action(dtp, sdp); + dt_cg(yypcb, anp); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = kind; + } +} + +static void +dt_action_trace(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + int ctflib; + + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + boolean_t istrace = (dnp->dn_ident->di_id == DT_ACT_TRACE); + const char *act = istrace ? "trace" : "print"; + + if (dt_node_is_void(dnp->dn_args)) { + dnerror(dnp->dn_args, istrace ? D_TRACE_VOID : D_PRINT_VOID, + "%s( ) may not be applied to a void expression\n", act); + } + + if (dt_node_resolve(dnp->dn_args, DT_IDENT_XLPTR) != NULL) { + dnerror(dnp->dn_args, istrace ? D_TRACE_DYN : D_PRINT_DYN, + "%s( ) may not be applied to a translated pointer\n", act); + } + + if (dnp->dn_args->dn_kind == DT_NODE_AGG) { + dnerror(dnp->dn_args, istrace ? D_TRACE_AGG : D_PRINT_AGG, + "%s( ) may not be applied to an aggregation%s\n", act, + istrace ? "" : " -- did you mean printa()?"); + } + + dt_cg(yypcb, dnp->dn_args); + + /* + * The print() action behaves identically to trace(), except that it + * stores the CTF type of the argument (if present) within the DOF for + * the DIFEXPR action. To do this, we set the 'dtsd_strdata' to point + * to the fully-qualified CTF type ID for the result of the DIF + * action. We use the ID instead of the name to handles complex types + * like arrays and function pointers that can't be resolved by + * ctf_type_lookup(). This is later processed by dtrace_dof_create() + * and turned into a reference into the string table so that we can + * get the type information when we process the data after the fact. In + * the case where we are referring to userland CTF data, we also need to + * to identify which ctf container in question we care about and encode + * that within the name. + */ + if (dnp->dn_ident->di_id == DT_ACT_PRINT) { + dt_node_t *dret; + size_t n; + dt_module_t *dmp; + + dret = yypcb->pcb_dret; + dmp = dt_module_lookup_by_ctf(dtp, dret->dn_ctfp); + + n = snprintf(NULL, 0, "%s`%ld", dmp->dm_name, dret->dn_type) + 1; + if (dmp->dm_pid != 0) { + ctflib = dt_module_getlibid(dtp, dmp, dret->dn_ctfp); + assert(ctflib >= 0); + n = snprintf(NULL, 0, "%s`%d`%ld", dmp->dm_name, + ctflib, dret->dn_type) + 1; + } else { + n = snprintf(NULL, 0, "%s`%ld", dmp->dm_name, + dret->dn_type) + 1; + } + sdp->dtsd_strdata = dt_alloc(dtp, n); + if (sdp->dtsd_strdata == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + (void) snprintf(sdp->dtsd_strdata, n, "%s`%ld", dmp->dm_name, + dret->dn_type); + if (dmp->dm_pid != 0) { + (void) snprintf(sdp->dtsd_strdata, n, "%s`%d`%ld", + dmp->dm_name, ctflib, dret->dn_type); + } else { + (void) snprintf(sdp->dtsd_strdata, n, "%s`%ld", + dmp->dm_name, dret->dn_type); + } + } + + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_DIFEXPR; +} + +static void +dt_action_tracemem(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_node_t *addr = dnp->dn_args; + dt_node_t *max = dnp->dn_args->dn_list; + dt_node_t *size; + + char n[DT_TYPE_NAMELEN]; + + if (dt_node_is_integer(addr) == 0 && dt_node_is_pointer(addr) == 0) { + dnerror(addr, D_TRACEMEM_ADDR, + "tracemem( ) argument #1 is incompatible with " + "prototype:\n\tprototype: pointer or integer\n" + "\t argument: %s\n", + dt_node_type_name(addr, n, sizeof (n))); + } + + if (dt_node_is_posconst(max) == 0) { + dnerror(max, D_TRACEMEM_SIZE, "tracemem( ) argument #2 must " + "be a non-zero positive integral constant expression\n"); + } + + if ((size = max->dn_list) != NULL) { + if (size->dn_list != NULL) { + dnerror(size, D_TRACEMEM_ARGS, "tracemem ( ) prototype " + "mismatch: expected at most 3 args\n"); + } + + if (!dt_node_is_scalar(size)) { + dnerror(size, D_TRACEMEM_DYNSIZE, "tracemem ( ) " + "dynamic size (argument #3) must be of " + "scalar type\n"); + } + + dt_cg(yypcb, size); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_difo->dtdo_rtype = dt_int_rtype; + ap->dtad_kind = DTRACEACT_TRACEMEM_DYNSIZE; + + ap = dt_stmt_action(dtp, sdp); + } + + dt_cg(yypcb, addr); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_TRACEMEM; + + ap->dtad_difo->dtdo_rtype.dtdt_flags |= DIF_TF_BYREF; + ap->dtad_difo->dtdo_rtype.dtdt_size = max->dn_value; +} + +static void +dt_action_stack_args(dtrace_hdl_t *dtp, dtrace_actdesc_t *ap, dt_node_t *arg0) +{ + ap->dtad_kind = DTRACEACT_STACK; + + if (dtp->dt_options[DTRACEOPT_STACKFRAMES] != DTRACEOPT_UNSET) { + ap->dtad_arg = dtp->dt_options[DTRACEOPT_STACKFRAMES]; + } else { + ap->dtad_arg = 0; + } + + if (arg0 != NULL) { + if (arg0->dn_list != NULL) { + dnerror(arg0, D_STACK_PROTO, "stack( ) prototype " + "mismatch: too many arguments\n"); + } + + if (dt_node_is_posconst(arg0) == 0) { + dnerror(arg0, D_STACK_SIZE, "stack( ) size must be a " + "non-zero positive integral constant expression\n"); + } + + ap->dtad_arg = arg0->dn_value; + } +} + +static void +dt_action_stack(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + dt_action_stack_args(dtp, ap, dnp->dn_args); +} + +static void +dt_action_ustack_args(dtrace_hdl_t *dtp, dtrace_actdesc_t *ap, dt_node_t *dnp) +{ + uint32_t nframes = 0; + uint32_t strsize = 0; /* default string table size */ + dt_node_t *arg0 = dnp->dn_args; + dt_node_t *arg1 = arg0 != NULL ? arg0->dn_list : NULL; + + assert(dnp->dn_ident->di_id == DT_ACT_JSTACK || + dnp->dn_ident->di_id == DT_ACT_USTACK); + + if (dnp->dn_ident->di_id == DT_ACT_JSTACK) { + if (dtp->dt_options[DTRACEOPT_JSTACKFRAMES] != DTRACEOPT_UNSET) + nframes = dtp->dt_options[DTRACEOPT_JSTACKFRAMES]; + + if (dtp->dt_options[DTRACEOPT_JSTACKSTRSIZE] != DTRACEOPT_UNSET) + strsize = dtp->dt_options[DTRACEOPT_JSTACKSTRSIZE]; + + ap->dtad_kind = DTRACEACT_JSTACK; + } else { + assert(dnp->dn_ident->di_id == DT_ACT_USTACK); + + if (dtp->dt_options[DTRACEOPT_USTACKFRAMES] != DTRACEOPT_UNSET) + nframes = dtp->dt_options[DTRACEOPT_USTACKFRAMES]; + + ap->dtad_kind = DTRACEACT_USTACK; + } + + if (arg0 != NULL) { + if (!dt_node_is_posconst(arg0)) { + dnerror(arg0, D_USTACK_FRAMES, "ustack( ) argument #1 " + "must be a non-zero positive integer constant\n"); + } + nframes = (uint32_t)arg0->dn_value; + } + + if (arg1 != NULL) { + if (arg1->dn_kind != DT_NODE_INT || + ((arg1->dn_flags & DT_NF_SIGNED) && + (int64_t)arg1->dn_value < 0)) { + dnerror(arg1, D_USTACK_STRSIZE, "ustack( ) argument #2 " + "must be a positive integer constant\n"); + } + + if (arg1->dn_list != NULL) { + dnerror(arg1, D_USTACK_PROTO, "ustack( ) prototype " + "mismatch: too many arguments\n"); + } + + strsize = (uint32_t)arg1->dn_value; + } + + ap->dtad_arg = DTRACE_USTACK_ARG(nframes, strsize); +} + +static void +dt_action_ustack(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + dt_action_ustack_args(dtp, ap, dnp); +} + +static void +dt_action_setopt(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap; + dt_node_t *arg0, *arg1; + + /* + * The prototype guarantees that we are called with either one or + * two arguments, and that any arguments that are present are strings. + */ + arg0 = dnp->dn_args; + arg1 = arg0->dn_list; + + ap = dt_stmt_action(dtp, sdp); + dt_cg(yypcb, arg0); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_LIBACT; + ap->dtad_arg = DT_ACT_SETOPT; + + ap = dt_stmt_action(dtp, sdp); + + if (arg1 == NULL) { + dt_action_difconst(ap, 0, DTRACEACT_LIBACT); + } else { + dt_cg(yypcb, arg1); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_LIBACT; + } + + ap->dtad_arg = DT_ACT_SETOPT; +} + +/*ARGSUSED*/ +static void +dt_action_symmod_args(dtrace_hdl_t *dtp, dtrace_actdesc_t *ap, + dt_node_t *dnp, dtrace_actkind_t kind) +{ + assert(kind == DTRACEACT_SYM || kind == DTRACEACT_MOD || + kind == DTRACEACT_USYM || kind == DTRACEACT_UMOD || + kind == DTRACEACT_UADDR); + + dt_cg(yypcb, dnp); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = kind; + ap->dtad_difo->dtdo_rtype.dtdt_size = sizeof (uint64_t); +} + +static void +dt_action_symmod(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp, + dtrace_actkind_t kind) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + dt_action_symmod_args(dtp, ap, dnp->dn_args, kind); +} + +/*ARGSUSED*/ +static void +dt_action_ftruncate(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + /* + * Library actions need a DIFO that serves as an argument. As + * ftruncate() doesn't take an argument, we generate the constant 0 + * in a DIFO; this constant will be ignored when the ftruncate() is + * processed. + */ + dt_action_difconst(ap, 0, DTRACEACT_LIBACT); + ap->dtad_arg = DT_ACT_FTRUNCATE; +} + +/*ARGSUSED*/ +static void +dt_action_stop(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + ap->dtad_kind = DTRACEACT_STOP; + ap->dtad_arg = 0; +} + +/*ARGSUSED*/ +static void +dt_action_breakpoint(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + ap->dtad_kind = DTRACEACT_BREAKPOINT; + ap->dtad_arg = 0; +} + +/*ARGSUSED*/ +static void +dt_action_panic(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + ap->dtad_kind = DTRACEACT_PANIC; + ap->dtad_arg = 0; +} + +static void +dt_action_chill(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_cg(yypcb, dnp->dn_args); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_CHILL; +} + +static void +dt_action_raise(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_cg(yypcb, dnp->dn_args); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_RAISE; +} + +static void +dt_action_exit(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_cg(yypcb, dnp->dn_args); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_EXIT; + ap->dtad_difo->dtdo_rtype.dtdt_size = sizeof (int); +} + +static void +dt_action_speculate(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_cg(yypcb, dnp->dn_args); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_SPECULATE; +} + +static void +dt_action_printm(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_node_t *size = dnp->dn_args; + dt_node_t *addr = dnp->dn_args->dn_list; + + char n[DT_TYPE_NAMELEN]; + + if (dt_node_is_posconst(size) == 0) { + dnerror(size, D_PRINTM_SIZE, "printm( ) argument #1 must " + "be a non-zero positive integral constant expression\n"); + } + + if (dt_node_is_pointer(addr) == 0) { + dnerror(addr, D_PRINTM_ADDR, + "printm( ) argument #2 is incompatible with " + "prototype:\n\tprototype: pointer\n" + "\t argument: %s\n", + dt_node_type_name(addr, n, sizeof (n))); + } + + dt_cg(yypcb, addr); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_PRINTM; + + ap->dtad_difo->dtdo_rtype.dtdt_flags |= DIF_TF_BYREF; + ap->dtad_difo->dtdo_rtype.dtdt_size = size->dn_value + sizeof(uintptr_t); +} + +static void +dt_action_printt(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_node_t *size = dnp->dn_args; + dt_node_t *addr = dnp->dn_args->dn_list; + + char n[DT_TYPE_NAMELEN]; + + if (dt_node_is_posconst(size) == 0) { + dnerror(size, D_PRINTT_SIZE, "printt( ) argument #1 must " + "be a non-zero positive integral constant expression\n"); + } + + if (addr == NULL || addr->dn_kind != DT_NODE_FUNC || + addr->dn_ident != dt_idhash_lookup(dtp->dt_globals, "typeref")) { + dnerror(addr, D_PRINTT_ADDR, + "printt( ) argument #2 is incompatible with " + "prototype:\n\tprototype: typeref()\n" + "\t argument: %s\n", + dt_node_type_name(addr, n, sizeof (n))); + } + + dt_cg(yypcb, addr); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_PRINTT; + + ap->dtad_difo->dtdo_rtype.dtdt_flags |= DIF_TF_BYREF; + + /* + * Allow additional buffer space for the data size, type size, + * type string length and a stab in the dark (32 bytes) for the + * type string. The type string is part of the typeref() that + * this action references. + */ + ap->dtad_difo->dtdo_rtype.dtdt_size = size->dn_value + 3 * sizeof(uintptr_t) + 32; + +} + +static void +dt_action_commit(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_cg(yypcb, dnp->dn_args); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_COMMIT; +} + +static void +dt_action_discard(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_cg(yypcb, dnp->dn_args); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_DISCARD; +} + +static void +dt_compile_fun(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + switch (dnp->dn_expr->dn_ident->di_id) { + case DT_ACT_BREAKPOINT: + dt_action_breakpoint(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_CHILL: + dt_action_chill(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_CLEAR: + dt_action_clear(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_COMMIT: + dt_action_commit(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_DENORMALIZE: + dt_action_normalize(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_DISCARD: + dt_action_discard(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_EXIT: + dt_action_exit(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_FREOPEN: + dt_action_printflike(dtp, dnp->dn_expr, sdp, DTRACEACT_FREOPEN); + break; + case DT_ACT_FTRUNCATE: + dt_action_ftruncate(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_MOD: + dt_action_symmod(dtp, dnp->dn_expr, sdp, DTRACEACT_MOD); + break; + case DT_ACT_NORMALIZE: + dt_action_normalize(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_PANIC: + dt_action_panic(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_PRINT: + dt_action_trace(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_PRINTA: + dt_action_printa(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_PRINTF: + dt_action_printflike(dtp, dnp->dn_expr, sdp, DTRACEACT_PRINTF); + break; + case DT_ACT_PRINTM: + dt_action_printm(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_PRINTT: + dt_action_printt(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_RAISE: + dt_action_raise(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_SETOPT: + dt_action_setopt(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_SPECULATE: + dt_action_speculate(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_STACK: + dt_action_stack(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_STOP: + dt_action_stop(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_SYM: + dt_action_symmod(dtp, dnp->dn_expr, sdp, DTRACEACT_SYM); + break; + case DT_ACT_SYSTEM: + dt_action_printflike(dtp, dnp->dn_expr, sdp, DTRACEACT_SYSTEM); + break; + case DT_ACT_TRACE: + dt_action_trace(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_TRACEMEM: + dt_action_tracemem(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_TRUNC: + dt_action_trunc(dtp, dnp->dn_expr, sdp); + break; + case DT_ACT_UADDR: + dt_action_symmod(dtp, dnp->dn_expr, sdp, DTRACEACT_UADDR); + break; + case DT_ACT_UMOD: + dt_action_symmod(dtp, dnp->dn_expr, sdp, DTRACEACT_UMOD); + break; + case DT_ACT_USYM: + dt_action_symmod(dtp, dnp->dn_expr, sdp, DTRACEACT_USYM); + break; + case DT_ACT_USTACK: + case DT_ACT_JSTACK: + dt_action_ustack(dtp, dnp->dn_expr, sdp); + break; + default: + dnerror(dnp->dn_expr, D_UNKNOWN, "tracing function %s( ) is " + "not yet supported\n", dnp->dn_expr->dn_ident->di_name); + } +} + +static void +dt_compile_exp(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *ap = dt_stmt_action(dtp, sdp); + + dt_cg(yypcb, dnp->dn_expr); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_difo->dtdo_rtype = dt_void_rtype; + ap->dtad_kind = DTRACEACT_DIFEXPR; +} + +static void +dt_compile_agg(dtrace_hdl_t *dtp, dt_node_t *dnp, dtrace_stmtdesc_t *sdp) +{ + dt_ident_t *aid, *fid; + dt_node_t *anp, *incr = NULL; + dtrace_actdesc_t *ap; + uint_t n = 1, argmax; + uint64_t arg = 0; + + /* + * If the aggregation has no aggregating function applied to it, then + * this statement has no effect. Flag this as a programming error. + */ + if (dnp->dn_aggfun == NULL) { + dnerror(dnp, D_AGG_NULL, "expression has null effect: @%s\n", + dnp->dn_ident->di_name); + } + + aid = dnp->dn_ident; + fid = dnp->dn_aggfun->dn_ident; + + if (dnp->dn_aggfun->dn_args != NULL && + dt_node_is_scalar(dnp->dn_aggfun->dn_args) == 0) { + dnerror(dnp->dn_aggfun, D_AGG_SCALAR, "%s( ) argument #1 must " + "be of scalar type\n", fid->di_name); + } + + /* + * The ID of the aggregation itself is implicitly recorded as the first + * member of each aggregation tuple so we can distinguish them later. + */ + ap = dt_stmt_action(dtp, sdp); + dt_action_difconst(ap, aid->di_id, DTRACEACT_DIFEXPR); + + for (anp = dnp->dn_aggtup; anp != NULL; anp = anp->dn_list) { + ap = dt_stmt_action(dtp, sdp); + n++; + + if (anp->dn_kind == DT_NODE_FUNC) { + if (anp->dn_ident->di_id == DT_ACT_STACK) { + dt_action_stack_args(dtp, ap, anp->dn_args); + continue; + } + + if (anp->dn_ident->di_id == DT_ACT_USTACK || + anp->dn_ident->di_id == DT_ACT_JSTACK) { + dt_action_ustack_args(dtp, ap, anp); + continue; + } + + switch (anp->dn_ident->di_id) { + case DT_ACT_UADDR: + dt_action_symmod_args(dtp, ap, + anp->dn_args, DTRACEACT_UADDR); + continue; + + case DT_ACT_USYM: + dt_action_symmod_args(dtp, ap, + anp->dn_args, DTRACEACT_USYM); + continue; + + case DT_ACT_UMOD: + dt_action_symmod_args(dtp, ap, + anp->dn_args, DTRACEACT_UMOD); + continue; + + case DT_ACT_SYM: + dt_action_symmod_args(dtp, ap, + anp->dn_args, DTRACEACT_SYM); + continue; + + case DT_ACT_MOD: + dt_action_symmod_args(dtp, ap, + anp->dn_args, DTRACEACT_MOD); + continue; + + default: + break; + } + } + + dt_cg(yypcb, anp); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_kind = DTRACEACT_DIFEXPR; + } + + if (fid->di_id == DTRACEAGG_LQUANTIZE) { + /* + * For linear quantization, we have between two and four + * arguments in addition to the expression: + * + * arg1 => Base value + * arg2 => Limit value + * arg3 => Quantization level step size (defaults to 1) + * arg4 => Quantization increment value (defaults to 1) + */ + dt_node_t *arg1 = dnp->dn_aggfun->dn_args->dn_list; + dt_node_t *arg2 = arg1->dn_list; + dt_node_t *arg3 = arg2->dn_list; + dt_idsig_t *isp; + uint64_t nlevels, step = 1, oarg; + int64_t baseval, limitval; + + if (arg1->dn_kind != DT_NODE_INT) { + dnerror(arg1, D_LQUANT_BASETYPE, "lquantize( ) " + "argument #1 must be an integer constant\n"); + } + + baseval = (int64_t)arg1->dn_value; + + if (baseval < INT32_MIN || baseval > INT32_MAX) { + dnerror(arg1, D_LQUANT_BASEVAL, "lquantize( ) " + "argument #1 must be a 32-bit quantity\n"); + } + + if (arg2->dn_kind != DT_NODE_INT) { + dnerror(arg2, D_LQUANT_LIMTYPE, "lquantize( ) " + "argument #2 must be an integer constant\n"); + } + + limitval = (int64_t)arg2->dn_value; + + if (limitval < INT32_MIN || limitval > INT32_MAX) { + dnerror(arg2, D_LQUANT_LIMVAL, "lquantize( ) " + "argument #2 must be a 32-bit quantity\n"); + } + + if (limitval < baseval) { + dnerror(dnp, D_LQUANT_MISMATCH, + "lquantize( ) base (argument #1) must be less " + "than limit (argument #2)\n"); + } + + if (arg3 != NULL) { + if (!dt_node_is_posconst(arg3)) { + dnerror(arg3, D_LQUANT_STEPTYPE, "lquantize( ) " + "argument #3 must be a non-zero positive " + "integer constant\n"); + } + + if ((step = arg3->dn_value) > UINT16_MAX) { + dnerror(arg3, D_LQUANT_STEPVAL, "lquantize( ) " + "argument #3 must be a 16-bit quantity\n"); + } + } + + nlevels = (limitval - baseval) / step; + + if (nlevels == 0) { + dnerror(dnp, D_LQUANT_STEPLARGE, + "lquantize( ) step (argument #3) too large: must " + "have at least one quantization level\n"); + } + + if (nlevels > UINT16_MAX) { + dnerror(dnp, D_LQUANT_STEPSMALL, "lquantize( ) step " + "(argument #3) too small: number of quantization " + "levels must be a 16-bit quantity\n"); + } + + arg = (step << DTRACE_LQUANTIZE_STEPSHIFT) | + (nlevels << DTRACE_LQUANTIZE_LEVELSHIFT) | + ((baseval << DTRACE_LQUANTIZE_BASESHIFT) & + DTRACE_LQUANTIZE_BASEMASK); + + assert(arg != 0); + + isp = (dt_idsig_t *)aid->di_data; + + if (isp->dis_auxinfo == 0) { + /* + * This is the first time we've seen an lquantize() + * for this aggregation; we'll store our argument + * as the auxiliary signature information. + */ + isp->dis_auxinfo = arg; + } else if ((oarg = isp->dis_auxinfo) != arg) { + /* + * If we have seen this lquantize() before and the + * argument doesn't match the original argument, pick + * the original argument apart to concisely report the + * mismatch. + */ + int obaseval = DTRACE_LQUANTIZE_BASE(oarg); + int onlevels = DTRACE_LQUANTIZE_LEVELS(oarg); + int ostep = DTRACE_LQUANTIZE_STEP(oarg); + + if (obaseval != baseval) { + dnerror(dnp, D_LQUANT_MATCHBASE, "lquantize( ) " + "base (argument #1) doesn't match previous " + "declaration: expected %d, found %d\n", + obaseval, (int)baseval); + } + + if (onlevels * ostep != nlevels * step) { + dnerror(dnp, D_LQUANT_MATCHLIM, "lquantize( ) " + "limit (argument #2) doesn't match previous" + " declaration: expected %d, found %d\n", + obaseval + onlevels * ostep, + (int)baseval + (int)nlevels * (int)step); + } + + if (ostep != step) { + dnerror(dnp, D_LQUANT_MATCHSTEP, "lquantize( ) " + "step (argument #3) doesn't match previous " + "declaration: expected %d, found %d\n", + ostep, (int)step); + } + + /* + * We shouldn't be able to get here -- one of the + * parameters must be mismatched if the arguments + * didn't match. + */ + assert(0); + } + + incr = arg3 != NULL ? arg3->dn_list : NULL; + argmax = 5; + } + + if (fid->di_id == DTRACEAGG_LLQUANTIZE) { + /* + * For log/linear quantizations, we have between one and five + * arguments in addition to the expression: + * + * arg1 => Factor + * arg2 => Low magnitude + * arg3 => High magnitude + * arg4 => Number of steps per magnitude + * arg5 => Quantization increment value (defaults to 1) + */ + dt_node_t *llarg = dnp->dn_aggfun->dn_args->dn_list; + uint64_t oarg, order, v; + dt_idsig_t *isp; + int i; + + struct { + char *str; /* string identifier */ + int badtype; /* error on bad type */ + int badval; /* error on bad value */ + int mismatch; /* error on bad match */ + int shift; /* shift value */ + uint16_t value; /* value itself */ + } args[] = { + { "factor", D_LLQUANT_FACTORTYPE, + D_LLQUANT_FACTORVAL, D_LLQUANT_FACTORMATCH, + DTRACE_LLQUANTIZE_FACTORSHIFT }, + { "low magnitude", D_LLQUANT_LOWTYPE, + D_LLQUANT_LOWVAL, D_LLQUANT_LOWMATCH, + DTRACE_LLQUANTIZE_LOWSHIFT }, + { "high magnitude", D_LLQUANT_HIGHTYPE, + D_LLQUANT_HIGHVAL, D_LLQUANT_HIGHMATCH, + DTRACE_LLQUANTIZE_HIGHSHIFT }, + { "linear steps per magnitude", D_LLQUANT_NSTEPTYPE, + D_LLQUANT_NSTEPVAL, D_LLQUANT_NSTEPMATCH, + DTRACE_LLQUANTIZE_NSTEPSHIFT }, + { NULL } + }; + + assert(arg == 0); + + for (i = 0; args[i].str != NULL; i++) { + if (llarg->dn_kind != DT_NODE_INT) { + dnerror(llarg, args[i].badtype, "llquantize( ) " + "argument #%d (%s) must be an " + "integer constant\n", i + 1, args[i].str); + } + + if ((uint64_t)llarg->dn_value > UINT16_MAX) { + dnerror(llarg, args[i].badval, "llquantize( ) " + "argument #%d (%s) must be an unsigned " + "16-bit quantity\n", i + 1, args[i].str); + } + + args[i].value = (uint16_t)llarg->dn_value; + + assert(!(arg & ((uint64_t)UINT16_MAX << + args[i].shift))); + arg |= ((uint64_t)args[i].value << args[i].shift); + llarg = llarg->dn_list; + } + + assert(arg != 0); + + if (args[0].value < 2) { + dnerror(dnp, D_LLQUANT_FACTORSMALL, "llquantize( ) " + "factor (argument #1) must be two or more\n"); + } + + if (args[1].value >= args[2].value) { + dnerror(dnp, D_LLQUANT_MAGRANGE, "llquantize( ) " + "high magnitude (argument #3) must be greater " + "than low magnitude (argument #2)\n"); + } + + if (args[3].value < args[0].value) { + dnerror(dnp, D_LLQUANT_FACTORNSTEPS, "llquantize( ) " + "factor (argument #1) must be less than or " + "equal to the number of linear steps per " + "magnitude (argument #4)\n"); + } + + for (v = args[0].value; v < args[3].value; v *= args[0].value) + continue; + + if ((args[3].value % args[0].value) || (v % args[3].value)) { + dnerror(dnp, D_LLQUANT_FACTOREVEN, "llquantize( ) " + "factor (argument #1) must evenly divide the " + "number of steps per magnitude (argument #4), " + "and the number of steps per magnitude must evenly " + "divide a power of the factor\n"); + } + + for (i = 0, order = 1; i < args[2].value; i++) { + if (order * args[0].value > order) { + order *= args[0].value; + continue; + } + + dnerror(dnp, D_LLQUANT_MAGTOOBIG, "llquantize( ) " + "factor (%d) raised to power of high magnitude " + "(%d) overflows 64-bits\n", args[0].value, + args[2].value); + } + + isp = (dt_idsig_t *)aid->di_data; + + if (isp->dis_auxinfo == 0) { + /* + * This is the first time we've seen an llquantize() + * for this aggregation; we'll store our argument + * as the auxiliary signature information. + */ + isp->dis_auxinfo = arg; + } else if ((oarg = isp->dis_auxinfo) != arg) { + /* + * If we have seen this llquantize() before and the + * argument doesn't match the original argument, pick + * the original argument apart to concisely report the + * mismatch. + */ + int expected = 0, found = 0; + + for (i = 0; expected == found; i++) { + assert(args[i].str != NULL); + + expected = (oarg >> args[i].shift) & UINT16_MAX; + found = (arg >> args[i].shift) & UINT16_MAX; + } + + dnerror(dnp, args[i - 1].mismatch, "llquantize( ) " + "%s (argument #%d) doesn't match previous " + "declaration: expected %d, found %d\n", + args[i - 1].str, i, expected, found); + } + + incr = llarg; + argmax = 6; + } + + if (fid->di_id == DTRACEAGG_QUANTIZE) { + incr = dnp->dn_aggfun->dn_args->dn_list; + argmax = 2; + } + + if (incr != NULL) { + if (!dt_node_is_scalar(incr)) { + dnerror(dnp, D_PROTO_ARG, "%s( ) increment value " + "(argument #%d) must be of scalar type\n", + fid->di_name, argmax); + } + + if ((anp = incr->dn_list) != NULL) { + int argc = argmax; + + for (; anp != NULL; anp = anp->dn_list) + argc++; + + dnerror(incr, D_PROTO_LEN, "%s( ) prototype " + "mismatch: %d args passed, at most %d expected", + fid->di_name, argc, argmax); + } + + ap = dt_stmt_action(dtp, sdp); + n++; + + dt_cg(yypcb, incr); + ap->dtad_difo = dt_as(yypcb); + ap->dtad_difo->dtdo_rtype = dt_void_rtype; + ap->dtad_kind = DTRACEACT_DIFEXPR; + } + + assert(sdp->dtsd_aggdata == NULL); + sdp->dtsd_aggdata = aid; + + ap = dt_stmt_action(dtp, sdp); + assert(fid->di_kind == DT_IDENT_AGGFUNC); + assert(DTRACEACT_ISAGG(fid->di_id)); + ap->dtad_kind = fid->di_id; + ap->dtad_ntuple = n; + ap->dtad_arg = arg; + + if (dnp->dn_aggfun->dn_args != NULL) { + dt_cg(yypcb, dnp->dn_aggfun->dn_args); + ap->dtad_difo = dt_as(yypcb); + } +} + +static void +dt_compile_one_clause(dtrace_hdl_t *dtp, dt_node_t *cnp, dt_node_t *pnp) +{ + dtrace_ecbdesc_t *edp; + dtrace_stmtdesc_t *sdp; + dt_node_t *dnp; + + yylineno = pnp->dn_line; + dt_setcontext(dtp, pnp->dn_desc); + (void) dt_node_cook(cnp, DT_IDFLG_REF); + + if (DT_TREEDUMP_PASS(dtp, 2)) + dt_node_printr(cnp, stderr, 0); + + if ((edp = dt_ecbdesc_create(dtp, pnp->dn_desc)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + assert(yypcb->pcb_ecbdesc == NULL); + yypcb->pcb_ecbdesc = edp; + + if (cnp->dn_pred != NULL) { + dt_cg(yypcb, cnp->dn_pred); + edp->dted_pred.dtpdd_difo = dt_as(yypcb); + } + + if (cnp->dn_acts == NULL) { + dt_stmt_append(dt_stmt_create(dtp, edp, + cnp->dn_ctxattr, _dtrace_defattr), cnp); + } + + for (dnp = cnp->dn_acts; dnp != NULL; dnp = dnp->dn_list) { + assert(yypcb->pcb_stmt == NULL); + sdp = dt_stmt_create(dtp, edp, cnp->dn_ctxattr, cnp->dn_attr); + + switch (dnp->dn_kind) { + case DT_NODE_DEXPR: + if (dnp->dn_expr->dn_kind == DT_NODE_AGG) + dt_compile_agg(dtp, dnp->dn_expr, sdp); + else + dt_compile_exp(dtp, dnp, sdp); + break; + case DT_NODE_DFUNC: + dt_compile_fun(dtp, dnp, sdp); + break; + case DT_NODE_AGG: + dt_compile_agg(dtp, dnp, sdp); + break; + default: + dnerror(dnp, D_UNKNOWN, "internal error -- node kind " + "%u is not a valid statement\n", dnp->dn_kind); + } + + assert(yypcb->pcb_stmt == sdp); + dt_stmt_append(sdp, dnp); + } + + assert(yypcb->pcb_ecbdesc == edp); + dt_ecbdesc_release(dtp, edp); + dt_endcontext(dtp); + yypcb->pcb_ecbdesc = NULL; +} + +static void +dt_compile_clause(dtrace_hdl_t *dtp, dt_node_t *cnp) +{ + dt_node_t *pnp; + + for (pnp = cnp->dn_pdescs; pnp != NULL; pnp = pnp->dn_list) + dt_compile_one_clause(dtp, cnp, pnp); +} + +static void +dt_compile_xlator(dt_node_t *dnp) +{ + dt_xlator_t *dxp = dnp->dn_xlator; + dt_node_t *mnp; + + for (mnp = dnp->dn_members; mnp != NULL; mnp = mnp->dn_list) { + assert(dxp->dx_membdif[mnp->dn_membid] == NULL); + dt_cg(yypcb, mnp); + dxp->dx_membdif[mnp->dn_membid] = dt_as(yypcb); + } +} + +void +dt_setcontext(dtrace_hdl_t *dtp, dtrace_probedesc_t *pdp) +{ + const dtrace_pattr_t *pap; + dt_probe_t *prp; + dt_provider_t *pvp; + dt_ident_t *idp; + char attrstr[8]; + int err; + + /* + * Both kernel and pid based providers are allowed to have names + * ending with what could be interpreted as a number. We assume it's + * a pid and that we may need to dynamically create probes for + * that process if: + * + * (1) The provider doesn't exist, or, + * (2) The provider exists and has DTRACE_PRIV_PROC privilege. + * + * On an error, dt_pid_create_probes() will set the error message + * and tag -- we just have to longjmp() out of here. + */ + if (isdigit(pdp->dtpd_provider[strlen(pdp->dtpd_provider) - 1]) && + ((pvp = dt_provider_lookup(dtp, pdp->dtpd_provider)) == NULL || + pvp->pv_desc.dtvd_priv.dtpp_flags & DTRACE_PRIV_PROC) && + dt_pid_create_probes(pdp, dtp, yypcb) != 0) { + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + } + + /* + * Call dt_probe_info() to get the probe arguments and attributes. If + * a representative probe is found, set 'pap' to the probe provider's + * attributes. Otherwise set 'pap' to default Unstable attributes. + */ + if ((prp = dt_probe_info(dtp, pdp, &yypcb->pcb_pinfo)) == NULL) { + pap = &_dtrace_prvdesc; + err = dtrace_errno(dtp); + bzero(&yypcb->pcb_pinfo, sizeof (dtrace_probeinfo_t)); + yypcb->pcb_pinfo.dtp_attr = pap->dtpa_provider; + yypcb->pcb_pinfo.dtp_arga = pap->dtpa_args; + } else { + pap = &prp->pr_pvp->pv_desc.dtvd_attr; + err = 0; + } + + if (err == EDT_NOPROBE && !(yypcb->pcb_cflags & DTRACE_C_ZDEFS)) { + xyerror(D_PDESC_ZERO, "probe description %s:%s:%s:%s does not " + "match any probes\n", pdp->dtpd_provider, pdp->dtpd_mod, + pdp->dtpd_func, pdp->dtpd_name); + } + + if (err != EDT_NOPROBE && err != EDT_UNSTABLE && err != 0) + xyerror(D_PDESC_INVAL, "%s\n", dtrace_errmsg(dtp, err)); + + dt_dprintf("set context to %s:%s:%s:%s [%u] prp=%p attr=%s argc=%d\n", + pdp->dtpd_provider, pdp->dtpd_mod, pdp->dtpd_func, pdp->dtpd_name, + pdp->dtpd_id, (void *)prp, dt_attr_str(yypcb->pcb_pinfo.dtp_attr, + attrstr, sizeof (attrstr)), yypcb->pcb_pinfo.dtp_argc); + + /* + * Reset the stability attributes of D global variables that vary + * based on the attributes of the provider and context itself. + */ + if ((idp = dt_idhash_lookup(dtp->dt_globals, "probeprov")) != NULL) + idp->di_attr = pap->dtpa_provider; + if ((idp = dt_idhash_lookup(dtp->dt_globals, "probemod")) != NULL) + idp->di_attr = pap->dtpa_mod; + if ((idp = dt_idhash_lookup(dtp->dt_globals, "probefunc")) != NULL) + idp->di_attr = pap->dtpa_func; + if ((idp = dt_idhash_lookup(dtp->dt_globals, "probename")) != NULL) + idp->di_attr = pap->dtpa_name; + if ((idp = dt_idhash_lookup(dtp->dt_globals, "args")) != NULL) + idp->di_attr = pap->dtpa_args; + + yypcb->pcb_pdesc = pdp; + yypcb->pcb_probe = prp; +} + +/* + * Reset context-dependent variables and state at the end of cooking a D probe + * definition clause. This ensures that external declarations between clauses + * do not reference any stale context-dependent data from the previous clause. + */ +void +dt_endcontext(dtrace_hdl_t *dtp) +{ + static const char *const cvars[] = { + "probeprov", "probemod", "probefunc", "probename", "args", NULL + }; + + dt_ident_t *idp; + int i; + + for (i = 0; cvars[i] != NULL; i++) { + if ((idp = dt_idhash_lookup(dtp->dt_globals, cvars[i])) != NULL) + idp->di_attr = _dtrace_defattr; + } + + yypcb->pcb_pdesc = NULL; + yypcb->pcb_probe = NULL; +} + +static int +dt_reduceid(dt_idhash_t *dhp, dt_ident_t *idp, dtrace_hdl_t *dtp) +{ + if (idp->di_vers != 0 && idp->di_vers > dtp->dt_vmax) + dt_idhash_delete(dhp, idp); + + return (0); +} + +/* + * When dtrace_setopt() is called for "version", it calls dt_reduce() to remove + * any identifiers or translators that have been previously defined as bound to + * a version greater than the specified version. Therefore, in our current + * version implementation, establishing a binding is a one-way transformation. + * In addition, no versioning is currently provided for types as our .d library + * files do not define any types and we reserve prefixes DTRACE_ and dtrace_ + * for our exclusive use. If required, type versioning will require more work. + */ +int +dt_reduce(dtrace_hdl_t *dtp, dt_version_t v) +{ + char s[DT_VERSION_STRMAX]; + dt_xlator_t *dxp, *nxp; + + if (v > dtp->dt_vmax) + return (dt_set_errno(dtp, EDT_VERSREDUCED)); + else if (v == dtp->dt_vmax) + return (0); /* no reduction necessary */ + + dt_dprintf("reducing api version to %s\n", + dt_version_num2str(v, s, sizeof (s))); + + dtp->dt_vmax = v; + + for (dxp = dt_list_next(&dtp->dt_xlators); dxp != NULL; dxp = nxp) { + nxp = dt_list_next(dxp); + if ((dxp->dx_souid.di_vers != 0 && dxp->dx_souid.di_vers > v) || + (dxp->dx_ptrid.di_vers != 0 && dxp->dx_ptrid.di_vers > v)) + dt_list_delete(&dtp->dt_xlators, dxp); + } + + (void) dt_idhash_iter(dtp->dt_macros, (dt_idhash_f *)dt_reduceid, dtp); + (void) dt_idhash_iter(dtp->dt_aggs, (dt_idhash_f *)dt_reduceid, dtp); + (void) dt_idhash_iter(dtp->dt_globals, (dt_idhash_f *)dt_reduceid, dtp); + (void) dt_idhash_iter(dtp->dt_tls, (dt_idhash_f *)dt_reduceid, dtp); + + return (0); +} + +/* + * Fork and exec the cpp(1) preprocessor to run over the specified input file, + * and return a FILE handle for the cpp output. We use the /dev/fd filesystem + * here to simplify the code by leveraging file descriptor inheritance. + */ +static FILE * +dt_preproc(dtrace_hdl_t *dtp, FILE *ifp) +{ + int argc = dtp->dt_cpp_argc; + char **argv = malloc(sizeof (char *) * (argc + 5)); + FILE *ofp = tmpfile(); + +#if defined(sun) + char ipath[20], opath[20]; /* big enough for /dev/fd/ + INT_MAX + \0 */ +#endif + char verdef[32]; /* big enough for -D__SUNW_D_VERSION=0x%08x + \0 */ + + struct sigaction act, oact; + sigset_t mask, omask; + + int wstat, estat; + pid_t pid; +#if defined(sun) + off64_t off; +#else + off_t off = 0; +#endif + int c; + + if (argv == NULL || ofp == NULL) { + (void) dt_set_errno(dtp, errno); + goto err; + } + + /* + * If the input is a seekable file, see if it is an interpreter file. + * If we see #!, seek past the first line because cpp will choke on it. + * We start cpp just prior to the \n at the end of this line so that + * it still sees the newline, ensuring that #line values are correct. + */ + if (isatty(fileno(ifp)) == 0 && (off = ftello64(ifp)) != -1) { + if ((c = fgetc(ifp)) == '#' && (c = fgetc(ifp)) == '!') { + for (off += 2; c != '\n'; off++) { + if ((c = fgetc(ifp)) == EOF) + break; + } + if (c == '\n') + off--; /* start cpp just prior to \n */ + } + (void) fflush(ifp); + (void) fseeko64(ifp, off, SEEK_SET); + } + +#if defined(sun) + (void) snprintf(ipath, sizeof (ipath), "/dev/fd/%d", fileno(ifp)); + (void) snprintf(opath, sizeof (opath), "/dev/fd/%d", fileno(ofp)); +#endif + + bcopy(dtp->dt_cpp_argv, argv, sizeof (char *) * argc); + + (void) snprintf(verdef, sizeof (verdef), + "-D__SUNW_D_VERSION=0x%08x", dtp->dt_vmax); + argv[argc++] = verdef; + +#if defined(sun) + switch (dtp->dt_stdcmode) { + case DT_STDC_XA: + case DT_STDC_XT: + argv[argc++] = "-D__STDC__=0"; + break; + case DT_STDC_XC: + argv[argc++] = "-D__STDC__=1"; + break; + } + + argv[argc++] = ipath; + argv[argc++] = opath; +#else + argv[argc++] = "-P"; +#endif + argv[argc] = NULL; + + /* + * libdtrace must be able to be embedded in other programs that may + * include application-specific signal handlers. Therefore, if we + * need to fork to run cpp(1), we must avoid generating a SIGCHLD + * that could confuse the containing application. To do this, + * we block SIGCHLD and reset its disposition to SIG_DFL. + * We restore our signal state once we are done. + */ + (void) sigemptyset(&mask); + (void) sigaddset(&mask, SIGCHLD); + (void) sigprocmask(SIG_BLOCK, &mask, &omask); + + bzero(&act, sizeof (act)); + act.sa_handler = SIG_DFL; + (void) sigaction(SIGCHLD, &act, &oact); + + if ((pid = fork1()) == -1) { + (void) sigaction(SIGCHLD, &oact, NULL); + (void) sigprocmask(SIG_SETMASK, &omask, NULL); + (void) dt_set_errno(dtp, EDT_CPPFORK); + goto err; + } + + if (pid == 0) { +#if !defined(sun) + if (isatty(fileno(ifp)) == 0) + lseek(fileno(ifp), off, SEEK_SET); + dup2(fileno(ifp), 0); + dup2(fileno(ofp), 1); +#endif + (void) execvp(dtp->dt_cpp_path, argv); + _exit(errno == ENOENT ? 127 : 126); + } + + do { + dt_dprintf("waiting for %s (PID %d)\n", dtp->dt_cpp_path, + (int)pid); + } while (waitpid(pid, &wstat, 0) == -1 && errno == EINTR); + + (void) sigaction(SIGCHLD, &oact, NULL); + (void) sigprocmask(SIG_SETMASK, &omask, NULL); + + dt_dprintf("%s returned exit status 0x%x\n", dtp->dt_cpp_path, wstat); + estat = WIFEXITED(wstat) ? WEXITSTATUS(wstat) : -1; + + if (estat != 0) { + switch (estat) { + case 126: + (void) dt_set_errno(dtp, EDT_CPPEXEC); + break; + case 127: + (void) dt_set_errno(dtp, EDT_CPPENT); + break; + default: + (void) dt_set_errno(dtp, EDT_CPPERR); + } + goto err; + } + + free(argv); + (void) fflush(ofp); + (void) fseek(ofp, 0, SEEK_SET); + return (ofp); + +err: + free(argv); + (void) fclose(ofp); + return (NULL); +} + +static void +dt_lib_depend_error(dtrace_hdl_t *dtp, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap); + va_end(ap); +} + +int +dt_lib_depend_add(dtrace_hdl_t *dtp, dt_list_t *dlp, const char *arg) +{ + dt_lib_depend_t *dld; + const char *end; + + assert(arg != NULL); + + if ((end = strrchr(arg, '/')) == NULL) + return (dt_set_errno(dtp, EINVAL)); + + if ((dld = dt_zalloc(dtp, sizeof (dt_lib_depend_t))) == NULL) + return (-1); + + if ((dld->dtld_libpath = dt_alloc(dtp, MAXPATHLEN)) == NULL) { + dt_free(dtp, dld); + return (-1); + } + + (void) strlcpy(dld->dtld_libpath, arg, end - arg + 2); + if ((dld->dtld_library = strdup(arg)) == NULL) { + dt_free(dtp, dld->dtld_libpath); + dt_free(dtp, dld); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + dt_list_append(dlp, dld); + return (0); +} + +dt_lib_depend_t * +dt_lib_depend_lookup(dt_list_t *dld, const char *arg) +{ + dt_lib_depend_t *dldn; + + for (dldn = dt_list_next(dld); dldn != NULL; + dldn = dt_list_next(dldn)) { + if (strcmp(dldn->dtld_library, arg) == 0) + return (dldn); + } + + return (NULL); +} + +/* + * Go through all the library files, and, if any library dependencies exist for + * that file, add it to that node's list of dependents. The result of this + * will be a graph which can then be topologically sorted to produce a + * compilation order. + */ +static int +dt_lib_build_graph(dtrace_hdl_t *dtp) +{ + dt_lib_depend_t *dld, *dpld; + + for (dld = dt_list_next(&dtp->dt_lib_dep); dld != NULL; + dld = dt_list_next(dld)) { + char *library = dld->dtld_library; + + for (dpld = dt_list_next(&dld->dtld_dependencies); dpld != NULL; + dpld = dt_list_next(dpld)) { + dt_lib_depend_t *dlda; + + if ((dlda = dt_lib_depend_lookup(&dtp->dt_lib_dep, + dpld->dtld_library)) == NULL) { + dt_lib_depend_error(dtp, + "Invalid library dependency in %s: %s\n", + dld->dtld_library, dpld->dtld_library); + + return (dt_set_errno(dtp, EDT_COMPILER)); + } + + if ((dt_lib_depend_add(dtp, &dlda->dtld_dependents, + library)) != 0) { + return (-1); /* preserve dt_errno */ + } + } + } + return (0); +} + +static int +dt_topo_sort(dtrace_hdl_t *dtp, dt_lib_depend_t *dld, int *count) +{ + dt_lib_depend_t *dpld, *dlda, *new; + + dld->dtld_start = ++(*count); + + for (dpld = dt_list_next(&dld->dtld_dependents); dpld != NULL; + dpld = dt_list_next(dpld)) { + dlda = dt_lib_depend_lookup(&dtp->dt_lib_dep, + dpld->dtld_library); + assert(dlda != NULL); + + if (dlda->dtld_start == 0 && + dt_topo_sort(dtp, dlda, count) == -1) + return (-1); + } + + if ((new = dt_zalloc(dtp, sizeof (dt_lib_depend_t))) == NULL) + return (-1); + + if ((new->dtld_library = strdup(dld->dtld_library)) == NULL) { + dt_free(dtp, new); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + new->dtld_start = dld->dtld_start; + new->dtld_finish = dld->dtld_finish = ++(*count); + dt_list_prepend(&dtp->dt_lib_dep_sorted, new); + + dt_dprintf("library %s sorted (%d/%d)\n", new->dtld_library, + new->dtld_start, new->dtld_finish); + + return (0); +} + +static int +dt_lib_depend_sort(dtrace_hdl_t *dtp) +{ + dt_lib_depend_t *dld, *dpld, *dlda; + int count = 0; + + if (dt_lib_build_graph(dtp) == -1) + return (-1); /* preserve dt_errno */ + + /* + * Perform a topological sort of the graph that hangs off + * dtp->dt_lib_dep. The result of this process will be a + * dependency ordered list located at dtp->dt_lib_dep_sorted. + */ + for (dld = dt_list_next(&dtp->dt_lib_dep); dld != NULL; + dld = dt_list_next(dld)) { + if (dld->dtld_start == 0 && + dt_topo_sort(dtp, dld, &count) == -1) + return (-1); /* preserve dt_errno */; + } + + /* + * Check the graph for cycles. If an ancestor's finishing time is + * less than any of its dependent's finishing times then a back edge + * exists in the graph and this is a cycle. + */ + for (dld = dt_list_next(&dtp->dt_lib_dep); dld != NULL; + dld = dt_list_next(dld)) { + for (dpld = dt_list_next(&dld->dtld_dependents); dpld != NULL; + dpld = dt_list_next(dpld)) { + dlda = dt_lib_depend_lookup(&dtp->dt_lib_dep_sorted, + dpld->dtld_library); + assert(dlda != NULL); + + if (dlda->dtld_finish > dld->dtld_finish) { + dt_lib_depend_error(dtp, + "Cyclic dependency detected: %s => %s\n", + dld->dtld_library, dpld->dtld_library); + + return (dt_set_errno(dtp, EDT_COMPILER)); + } + } + } + + return (0); +} + +static void +dt_lib_depend_free(dtrace_hdl_t *dtp) +{ + dt_lib_depend_t *dld, *dlda; + + while ((dld = dt_list_next(&dtp->dt_lib_dep)) != NULL) { + while ((dlda = dt_list_next(&dld->dtld_dependencies)) != NULL) { + dt_list_delete(&dld->dtld_dependencies, dlda); + dt_free(dtp, dlda->dtld_library); + dt_free(dtp, dlda->dtld_libpath); + dt_free(dtp, dlda); + } + while ((dlda = dt_list_next(&dld->dtld_dependents)) != NULL) { + dt_list_delete(&dld->dtld_dependents, dlda); + dt_free(dtp, dlda->dtld_library); + dt_free(dtp, dlda->dtld_libpath); + dt_free(dtp, dlda); + } + dt_list_delete(&dtp->dt_lib_dep, dld); + dt_free(dtp, dld->dtld_library); + dt_free(dtp, dld->dtld_libpath); + dt_free(dtp, dld); + } + + while ((dld = dt_list_next(&dtp->dt_lib_dep_sorted)) != NULL) { + dt_list_delete(&dtp->dt_lib_dep_sorted, dld); + dt_free(dtp, dld->dtld_library); + dt_free(dtp, dld); + } +} + +/* + * Open all the .d library files found in the specified directory and + * compile each one of them. We silently ignore any missing directories and + * other files found therein. We only fail (and thereby fail dt_load_libs()) if + * we fail to compile a library and the error is something other than #pragma D + * depends_on. Dependency errors are silently ignored to permit a library + * directory to contain libraries which may not be accessible depending on our + * privileges. + */ +static int +dt_load_libs_dir(dtrace_hdl_t *dtp, const char *path) +{ + struct dirent *dp; + const char *p, *end; + DIR *dirp; + + char fname[PATH_MAX]; + FILE *fp; + void *rv; + dt_lib_depend_t *dld; + + if ((dirp = opendir(path)) == NULL) { + dt_dprintf("skipping lib dir %s: %s\n", path, strerror(errno)); + return (0); + } + + /* First, parse each file for library dependencies. */ + while ((dp = readdir(dirp)) != NULL) { + if ((p = strrchr(dp->d_name, '.')) == NULL || strcmp(p, ".d")) + continue; /* skip any filename not ending in .d */ + + (void) snprintf(fname, sizeof (fname), + "%s/%s", path, dp->d_name); + + if ((fp = fopen(fname, "r")) == NULL) { + dt_dprintf("skipping library %s: %s\n", + fname, strerror(errno)); + continue; + } + + /* + * Skip files whose name match an already processed library + */ + for (dld = dt_list_next(&dtp->dt_lib_dep); dld != NULL; + dld = dt_list_next(dld)) { + end = strrchr(dld->dtld_library, '/'); + /* dt_lib_depend_add ensures this */ + assert(end != NULL); + if (strcmp(end + 1, dp->d_name) == 0) + break; + } + + if (dld != NULL) { + dt_dprintf("skipping library %s, already processed " + "library with the same name: %s", dp->d_name, + dld->dtld_library); + (void) fclose(fp); + continue; + } + + dtp->dt_filetag = fname; + if (dt_lib_depend_add(dtp, &dtp->dt_lib_dep, fname) != 0) { + (void) fclose(fp); + return (-1); /* preserve dt_errno */ + } + + rv = dt_compile(dtp, DT_CTX_DPROG, + DTRACE_PROBESPEC_NAME, NULL, + DTRACE_C_EMPTY | DTRACE_C_CTL, 0, NULL, fp, NULL); + + if (rv != NULL && dtp->dt_errno && + (dtp->dt_errno != EDT_COMPILER || + dtp->dt_errtag != dt_errtag(D_PRAGMA_DEPEND))) { + (void) fclose(fp); + return (-1); /* preserve dt_errno */ + } + + if (dtp->dt_errno) + dt_dprintf("error parsing library %s: %s\n", + fname, dtrace_errmsg(dtp, dtrace_errno(dtp))); + + (void) fclose(fp); + dtp->dt_filetag = NULL; + } + + (void) closedir(dirp); + + return (0); +} + +/* + * Perform a topological sorting of all the libraries found across the entire + * dt_lib_path. Once sorted, compile each one in topological order to cache its + * inlines and translators, etc. We silently ignore any missing directories and + * other files found therein. We only fail (and thereby fail dt_load_libs()) if + * we fail to compile a library and the error is something other than #pragma D + * depends_on. Dependency errors are silently ignored to permit a library + * directory to contain libraries which may not be accessible depending on our + * privileges. + */ +static int +dt_load_libs_sort(dtrace_hdl_t *dtp) +{ + dtrace_prog_t *pgp; + FILE *fp; + dt_lib_depend_t *dld; + + /* + * Finish building the graph containing the library dependencies + * and perform a topological sort to generate an ordered list + * for compilation. + */ + if (dt_lib_depend_sort(dtp) == -1) + goto err; + + for (dld = dt_list_next(&dtp->dt_lib_dep_sorted); dld != NULL; + dld = dt_list_next(dld)) { + + if ((fp = fopen(dld->dtld_library, "r")) == NULL) { + dt_dprintf("skipping library %s: %s\n", + dld->dtld_library, strerror(errno)); + continue; + } + + dtp->dt_filetag = dld->dtld_library; + pgp = dtrace_program_fcompile(dtp, fp, DTRACE_C_EMPTY, 0, NULL); + (void) fclose(fp); + dtp->dt_filetag = NULL; + + if (pgp == NULL && (dtp->dt_errno != EDT_COMPILER || + dtp->dt_errtag != dt_errtag(D_PRAGMA_DEPEND))) + goto err; + + if (pgp == NULL) { + dt_dprintf("skipping library %s: %s\n", + dld->dtld_library, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } else { + dld->dtld_loaded = B_TRUE; + dt_program_destroy(dtp, pgp); + } + } + + dt_lib_depend_free(dtp); + return (0); + +err: + dt_lib_depend_free(dtp); + return (-1); /* preserve dt_errno */ +} + +/* + * Load the contents of any appropriate DTrace .d library files. These files + * contain inlines and translators that will be cached by the compiler. We + * defer this activity until the first compile to permit libdtrace clients to + * add their own library directories and so that we can properly report errors. + */ +static int +dt_load_libs(dtrace_hdl_t *dtp) +{ + dt_dirpath_t *dirp; + + if (dtp->dt_cflags & DTRACE_C_NOLIBS) + return (0); /* libraries already processed */ + + dtp->dt_cflags |= DTRACE_C_NOLIBS; + + /* + * /usr/lib/dtrace is always at the head of the list. The rest of the + * list is specified in the precedence order the user requested. Process + * everything other than the head first. DTRACE_C_NOLIBS has already + * been spcified so dt_vopen will ensure that there is always one entry + * in dt_lib_path. + */ + for (dirp = dt_list_next(dt_list_next(&dtp->dt_lib_path)); + dirp != NULL; dirp = dt_list_next(dirp)) { + if (dt_load_libs_dir(dtp, dirp->dir_path) != 0) { + dtp->dt_cflags &= ~DTRACE_C_NOLIBS; + return (-1); /* errno is set for us */ + } + } + + /* Handle /usr/lib/dtrace */ + dirp = dt_list_next(&dtp->dt_lib_path); + if (dt_load_libs_dir(dtp, dirp->dir_path) != 0) { + dtp->dt_cflags &= ~DTRACE_C_NOLIBS; + return (-1); /* errno is set for us */ + } + + if (dt_load_libs_sort(dtp) < 0) + return (-1); /* errno is set for us */ + + return (0); +} + +static void * +dt_compile(dtrace_hdl_t *dtp, int context, dtrace_probespec_t pspec, void *arg, + uint_t cflags, int argc, char *const argv[], FILE *fp, const char *s) +{ + dt_node_t *dnp; + dt_decl_t *ddp; + dt_pcb_t pcb; + void *rv; + int err; + + if ((fp == NULL && s == NULL) || (cflags & ~DTRACE_C_MASK) != 0) { + (void) dt_set_errno(dtp, EINVAL); + return (NULL); + } + + if (dt_list_next(&dtp->dt_lib_path) != NULL && dt_load_libs(dtp) != 0) + return (NULL); /* errno is set for us */ + + if (dtp->dt_globals->dh_nelems != 0) + (void) dt_idhash_iter(dtp->dt_globals, dt_idreset, NULL); + + if (dtp->dt_tls->dh_nelems != 0) + (void) dt_idhash_iter(dtp->dt_tls, dt_idreset, NULL); + + if (fp && (cflags & DTRACE_C_CPP) && (fp = dt_preproc(dtp, fp)) == NULL) + return (NULL); /* errno is set for us */ + + dt_pcb_push(dtp, &pcb); + + pcb.pcb_fileptr = fp; + pcb.pcb_string = s; + pcb.pcb_strptr = s; + pcb.pcb_strlen = s ? strlen(s) : 0; + pcb.pcb_sargc = argc; + pcb.pcb_sargv = argv; + pcb.pcb_sflagv = argc ? calloc(argc, sizeof (ushort_t)) : NULL; + pcb.pcb_pspec = pspec; + pcb.pcb_cflags = dtp->dt_cflags | cflags; + pcb.pcb_amin = dtp->dt_amin; + pcb.pcb_yystate = -1; + pcb.pcb_context = context; + pcb.pcb_token = context; + + if (context != DT_CTX_DPROG) + yybegin(YYS_EXPR); + else if (cflags & DTRACE_C_CTL) + yybegin(YYS_CONTROL); + else + yybegin(YYS_CLAUSE); + + if ((err = setjmp(yypcb->pcb_jmpbuf)) != 0) + goto out; + + if (yypcb->pcb_sargc != 0 && yypcb->pcb_sflagv == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + yypcb->pcb_idents = dt_idhash_create("ambiguous", NULL, 0, 0); + yypcb->pcb_locals = dt_idhash_create("clause local", NULL, + DIF_VAR_OTHER_UBASE, DIF_VAR_OTHER_MAX); + + if (yypcb->pcb_idents == NULL || yypcb->pcb_locals == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + /* + * Invoke the parser to evaluate the D source code. If any errors + * occur during parsing, an error function will be called and we + * will longjmp back to pcb_jmpbuf to abort. If parsing succeeds, + * we optionally display the parse tree if debugging is enabled. + */ + if (yyparse() != 0 || yypcb->pcb_root == NULL) + xyerror(D_EMPTY, "empty D program translation unit\n"); + + yybegin(YYS_DONE); + + if (cflags & DTRACE_C_CTL) + goto out; + + if (context != DT_CTX_DTYPE && DT_TREEDUMP_PASS(dtp, 1)) + dt_node_printr(yypcb->pcb_root, stderr, 0); + + if (yypcb->pcb_pragmas != NULL) + (void) dt_idhash_iter(yypcb->pcb_pragmas, dt_idpragma, NULL); + + if (argc > 1 && !(yypcb->pcb_cflags & DTRACE_C_ARGREF) && + !(yypcb->pcb_sflagv[argc - 1] & DT_IDFLG_REF)) { + xyerror(D_MACRO_UNUSED, "extraneous argument '%s' ($%d is " + "not referenced)\n", yypcb->pcb_sargv[argc - 1], argc - 1); + } + + /* + * If we have successfully created a parse tree for a D program, loop + * over the clauses and actions and instantiate the corresponding + * libdtrace program. If we are parsing a D expression, then we + * simply run the code generator and assembler on the resulting tree. + */ + switch (context) { + case DT_CTX_DPROG: + assert(yypcb->pcb_root->dn_kind == DT_NODE_PROG); + + if ((dnp = yypcb->pcb_root->dn_list) == NULL && + !(yypcb->pcb_cflags & DTRACE_C_EMPTY)) + xyerror(D_EMPTY, "empty D program translation unit\n"); + + if ((yypcb->pcb_prog = dt_program_create(dtp)) == NULL) + longjmp(yypcb->pcb_jmpbuf, dtrace_errno(dtp)); + + for (; dnp != NULL; dnp = dnp->dn_list) { + switch (dnp->dn_kind) { + case DT_NODE_CLAUSE: + dt_compile_clause(dtp, dnp); + break; + case DT_NODE_XLATOR: + if (dtp->dt_xlatemode == DT_XL_DYNAMIC) + dt_compile_xlator(dnp); + break; + case DT_NODE_PROVIDER: + (void) dt_node_cook(dnp, DT_IDFLG_REF); + break; + } + } + + yypcb->pcb_prog->dp_xrefs = yypcb->pcb_asxrefs; + yypcb->pcb_prog->dp_xrefslen = yypcb->pcb_asxreflen; + yypcb->pcb_asxrefs = NULL; + yypcb->pcb_asxreflen = 0; + + rv = yypcb->pcb_prog; + break; + + case DT_CTX_DEXPR: + (void) dt_node_cook(yypcb->pcb_root, DT_IDFLG_REF); + dt_cg(yypcb, yypcb->pcb_root); + rv = dt_as(yypcb); + break; + + case DT_CTX_DTYPE: + ddp = (dt_decl_t *)yypcb->pcb_root; /* root is really a decl */ + err = dt_decl_type(ddp, arg); + dt_decl_free(ddp); + + if (err != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + + rv = NULL; + break; + } + +out: + if (context != DT_CTX_DTYPE && yypcb->pcb_root != NULL && + DT_TREEDUMP_PASS(dtp, 3)) + dt_node_printr(yypcb->pcb_root, stderr, 0); + + if (dtp->dt_cdefs_fd != -1 && (ftruncate64(dtp->dt_cdefs_fd, 0) == -1 || + lseek64(dtp->dt_cdefs_fd, 0, SEEK_SET) == -1 || + ctf_write(dtp->dt_cdefs->dm_ctfp, dtp->dt_cdefs_fd) == CTF_ERR)) + dt_dprintf("failed to update CTF cache: %s\n", strerror(errno)); + + if (dtp->dt_ddefs_fd != -1 && (ftruncate64(dtp->dt_ddefs_fd, 0) == -1 || + lseek64(dtp->dt_ddefs_fd, 0, SEEK_SET) == -1 || + ctf_write(dtp->dt_ddefs->dm_ctfp, dtp->dt_ddefs_fd) == CTF_ERR)) + dt_dprintf("failed to update CTF cache: %s\n", strerror(errno)); + + if (yypcb->pcb_fileptr && (cflags & DTRACE_C_CPP)) + (void) fclose(yypcb->pcb_fileptr); /* close dt_preproc() file */ + + dt_pcb_pop(dtp, err); + (void) dt_set_errno(dtp, err); + return (err ? NULL : rv); +} + +dtrace_prog_t * +dtrace_program_strcompile(dtrace_hdl_t *dtp, const char *s, + dtrace_probespec_t spec, uint_t cflags, int argc, char *const argv[]) +{ + return (dt_compile(dtp, DT_CTX_DPROG, + spec, NULL, cflags, argc, argv, NULL, s)); +} + +dtrace_prog_t * +dtrace_program_fcompile(dtrace_hdl_t *dtp, FILE *fp, + uint_t cflags, int argc, char *const argv[]) +{ + return (dt_compile(dtp, DT_CTX_DPROG, + DTRACE_PROBESPEC_NAME, NULL, cflags, argc, argv, fp, NULL)); +} + +int +dtrace_type_strcompile(dtrace_hdl_t *dtp, const char *s, dtrace_typeinfo_t *dtt) +{ + (void) dt_compile(dtp, DT_CTX_DTYPE, + DTRACE_PROBESPEC_NONE, dtt, 0, 0, NULL, NULL, s); + return (dtp->dt_errno ? -1 : 0); +} + +int +dtrace_type_fcompile(dtrace_hdl_t *dtp, FILE *fp, dtrace_typeinfo_t *dtt) +{ + (void) dt_compile(dtp, DT_CTX_DTYPE, + DTRACE_PROBESPEC_NONE, dtt, 0, 0, NULL, fp, NULL); + return (dtp->dt_errno ? -1 : 0); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_cg.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_cg.c new file mode 100644 index 0000000..e748ff2 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_cg.c @@ -0,0 +1,2185 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/isa_defs.h> + +#include <strings.h> +#include <stdlib.h> +#include <setjmp.h> +#include <assert.h> +#include <errno.h> + +#include <dt_impl.h> +#include <dt_grammar.h> +#include <dt_parser.h> +#include <dt_provider.h> + +static void dt_cg_node(dt_node_t *, dt_irlist_t *, dt_regset_t *); + +static dt_irnode_t * +dt_cg_node_alloc(uint_t label, dif_instr_t instr) +{ + dt_irnode_t *dip = malloc(sizeof (dt_irnode_t)); + + if (dip == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dip->di_label = label; + dip->di_instr = instr; + dip->di_extern = NULL; + dip->di_next = NULL; + + return (dip); +} + +/* + * Code generator wrapper function for ctf_member_info. If we are given a + * reference to a forward declaration tag, search the entire type space for + * the actual definition and then call ctf_member_info on the result. + */ +static ctf_file_t * +dt_cg_membinfo(ctf_file_t *fp, ctf_id_t type, const char *s, ctf_membinfo_t *mp) +{ + while (ctf_type_kind(fp, type) == CTF_K_FORWARD) { + char n[DT_TYPE_NAMELEN]; + dtrace_typeinfo_t dtt; + + if (ctf_type_name(fp, type, n, sizeof (n)) == NULL || + dt_type_lookup(n, &dtt) == -1 || ( + dtt.dtt_ctfp == fp && dtt.dtt_type == type)) + break; /* unable to improve our position */ + + fp = dtt.dtt_ctfp; + type = ctf_type_resolve(fp, dtt.dtt_type); + } + + if (ctf_member_info(fp, type, s, mp) == CTF_ERR) + return (NULL); /* ctf_errno is set for us */ + + return (fp); +} + +static void +dt_cg_xsetx(dt_irlist_t *dlp, dt_ident_t *idp, uint_t lbl, int reg, uint64_t x) +{ + int flag = idp != NULL ? DT_INT_PRIVATE : DT_INT_SHARED; + int intoff = dt_inttab_insert(yypcb->pcb_inttab, x, flag); + dif_instr_t instr = DIF_INSTR_SETX((uint_t)intoff, reg); + + if (intoff == -1) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + if (intoff > DIF_INTOFF_MAX) + longjmp(yypcb->pcb_jmpbuf, EDT_INT2BIG); + + dt_irlist_append(dlp, dt_cg_node_alloc(lbl, instr)); + + if (idp != NULL) + dlp->dl_last->di_extern = idp; +} + +static void +dt_cg_setx(dt_irlist_t *dlp, int reg, uint64_t x) +{ + dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, reg, x); +} + +/* + * When loading bit-fields, we want to convert a byte count in the range + * 1-8 to the closest power of 2 (e.g. 3->4, 5->8, etc). The clp2() function + * is a clever implementation from "Hacker's Delight" by Henry Warren, Jr. + */ +static size_t +clp2(size_t x) +{ + x--; + + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + + return (x + 1); +} + +/* + * Lookup the correct load opcode to use for the specified node and CTF type. + * We determine the size and convert it to a 3-bit index. Our lookup table + * is constructed to use a 5-bit index, consisting of the 3-bit size 0-7, a + * bit for the sign, and a bit for userland address. For example, a 4-byte + * signed load from userland would be at the following table index: + * user=1 sign=1 size=4 => binary index 11011 = decimal index 27 + */ +static uint_t +dt_cg_load(dt_node_t *dnp, ctf_file_t *ctfp, ctf_id_t type) +{ + static const uint_t ops[] = { + DIF_OP_LDUB, DIF_OP_LDUH, 0, DIF_OP_LDUW, + 0, 0, 0, DIF_OP_LDX, + DIF_OP_LDSB, DIF_OP_LDSH, 0, DIF_OP_LDSW, + 0, 0, 0, DIF_OP_LDX, + DIF_OP_ULDUB, DIF_OP_ULDUH, 0, DIF_OP_ULDUW, + 0, 0, 0, DIF_OP_ULDX, + DIF_OP_ULDSB, DIF_OP_ULDSH, 0, DIF_OP_ULDSW, + 0, 0, 0, DIF_OP_ULDX, + }; + + ctf_encoding_t e; + ssize_t size; + + /* + * If we're loading a bit-field, the size of our load is found by + * rounding cte_bits up to a byte boundary and then finding the + * nearest power of two to this value (see clp2(), above). + */ + if ((dnp->dn_flags & DT_NF_BITFIELD) && + ctf_type_encoding(ctfp, type, &e) != CTF_ERR) + size = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY); + else + size = ctf_type_size(ctfp, type); + + if (size < 1 || size > 8 || (size & (size - 1)) != 0) { + xyerror(D_UNKNOWN, "internal error -- cg cannot load " + "size %ld when passed by value\n", (long)size); + } + + size--; /* convert size to 3-bit index */ + + if (dnp->dn_flags & DT_NF_SIGNED) + size |= 0x08; + if (dnp->dn_flags & DT_NF_USERLAND) + size |= 0x10; + + return (ops[size]); +} + +static void +dt_cg_ptrsize(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp, + uint_t op, int dreg) +{ + ctf_file_t *ctfp = dnp->dn_ctfp; + ctf_arinfo_t r; + dif_instr_t instr; + ctf_id_t type; + uint_t kind; + ssize_t size; + int sreg; + + type = ctf_type_resolve(ctfp, dnp->dn_type); + kind = ctf_type_kind(ctfp, type); + assert(kind == CTF_K_POINTER || kind == CTF_K_ARRAY); + + if (kind == CTF_K_ARRAY) { + if (ctf_array_info(ctfp, type, &r) != 0) { + yypcb->pcb_hdl->dt_ctferr = ctf_errno(ctfp); + longjmp(yypcb->pcb_jmpbuf, EDT_CTF); + } + type = r.ctr_contents; + } else + type = ctf_type_reference(ctfp, type); + + if ((size = ctf_type_size(ctfp, type)) == 1) + return; /* multiply or divide by one can be omitted */ + + sreg = dt_regset_alloc(drp); + dt_cg_setx(dlp, sreg, size); + instr = DIF_INSTR_FMT(op, dreg, sreg, dreg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, sreg); +} + +/* + * If the result of a "." or "->" operation is a bit-field, we use this routine + * to generate an epilogue to the load instruction that extracts the value. In + * the diagrams below the "ld??" is the load instruction that is generated to + * load the containing word that is generating prior to calling this function. + * + * Epilogue for unsigned fields: Epilogue for signed fields: + * + * ldu? [r1], r1 lds? [r1], r1 + * setx USHIFT, r2 setx 64 - SSHIFT, r2 + * srl r1, r2, r1 sll r1, r2, r1 + * setx (1 << bits) - 1, r2 setx 64 - bits, r2 + * and r1, r2, r1 sra r1, r2, r1 + * + * The *SHIFT constants above changes value depending on the endian-ness of our + * target architecture. Refer to the comments below for more details. + */ +static void +dt_cg_field_get(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp, + ctf_file_t *fp, const ctf_membinfo_t *mp) +{ + ctf_encoding_t e; + dif_instr_t instr; + uint64_t shift; + int r1, r2; + + if (ctf_type_encoding(fp, mp->ctm_type, &e) != 0 || e.cte_bits > 64) { + xyerror(D_UNKNOWN, "cg: bad field: off %lu type <%ld> " + "bits %u\n", mp->ctm_offset, mp->ctm_type, e.cte_bits); + } + + assert(dnp->dn_op == DT_TOK_PTR || dnp->dn_op == DT_TOK_DOT); + r1 = dnp->dn_left->dn_reg; + r2 = dt_regset_alloc(drp); + + /* + * On little-endian architectures, ctm_offset counts from the right so + * ctm_offset % NBBY itself is the amount we want to shift right to + * move the value bits to the little end of the register to mask them. + * On big-endian architectures, ctm_offset counts from the left so we + * must subtract (ctm_offset % NBBY + cte_bits) from the size in bits + * we used for the load. The size of our load in turn is found by + * rounding cte_bits up to a byte boundary and then finding the + * nearest power of two to this value (see clp2(), above). These + * properties are used to compute shift as USHIFT or SSHIFT, below. + */ + if (dnp->dn_flags & DT_NF_SIGNED) { +#if BYTE_ORDER == _BIG_ENDIAN + shift = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY) * NBBY - + mp->ctm_offset % NBBY; +#else + shift = mp->ctm_offset % NBBY + e.cte_bits; +#endif + dt_cg_setx(dlp, r2, 64 - shift); + instr = DIF_INSTR_FMT(DIF_OP_SLL, r1, r2, r1); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_setx(dlp, r2, 64 - e.cte_bits); + instr = DIF_INSTR_FMT(DIF_OP_SRA, r1, r2, r1); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + } else { +#if BYTE_ORDER == _BIG_ENDIAN + shift = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY) * NBBY - + (mp->ctm_offset % NBBY + e.cte_bits); +#else + shift = mp->ctm_offset % NBBY; +#endif + dt_cg_setx(dlp, r2, shift); + instr = DIF_INSTR_FMT(DIF_OP_SRL, r1, r2, r1); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_setx(dlp, r2, (1ULL << e.cte_bits) - 1); + instr = DIF_INSTR_FMT(DIF_OP_AND, r1, r2, r1); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + } + + dt_regset_free(drp, r2); +} + +/* + * If the destination of a store operation is a bit-field, we use this routine + * to generate a prologue to the store instruction that loads the surrounding + * bits, clears the destination field, and ORs in the new value of the field. + * In the diagram below the "st?" is the store instruction that is generated to + * store the containing word that is generating after calling this function. + * + * ld [dst->dn_reg], r1 + * setx ~(((1 << cte_bits) - 1) << (ctm_offset % NBBY)), r2 + * and r1, r2, r1 + * + * setx (1 << cte_bits) - 1, r2 + * and src->dn_reg, r2, r2 + * setx ctm_offset % NBBY, r3 + * sll r2, r3, r2 + * + * or r1, r2, r1 + * st? r1, [dst->dn_reg] + * + * This routine allocates a new register to hold the value to be stored and + * returns it. The caller is responsible for freeing this register later. + */ +static int +dt_cg_field_set(dt_node_t *src, dt_irlist_t *dlp, + dt_regset_t *drp, dt_node_t *dst) +{ + uint64_t cmask, fmask, shift; + dif_instr_t instr; + int r1, r2, r3; + + ctf_membinfo_t m; + ctf_encoding_t e; + ctf_file_t *fp, *ofp; + ctf_id_t type; + + assert(dst->dn_op == DT_TOK_PTR || dst->dn_op == DT_TOK_DOT); + assert(dst->dn_right->dn_kind == DT_NODE_IDENT); + + fp = dst->dn_left->dn_ctfp; + type = ctf_type_resolve(fp, dst->dn_left->dn_type); + + if (dst->dn_op == DT_TOK_PTR) { + type = ctf_type_reference(fp, type); + type = ctf_type_resolve(fp, type); + } + + if ((fp = dt_cg_membinfo(ofp = fp, type, + dst->dn_right->dn_string, &m)) == NULL) { + yypcb->pcb_hdl->dt_ctferr = ctf_errno(ofp); + longjmp(yypcb->pcb_jmpbuf, EDT_CTF); + } + + if (ctf_type_encoding(fp, m.ctm_type, &e) != 0 || e.cte_bits > 64) { + xyerror(D_UNKNOWN, "cg: bad field: off %lu type <%ld> " + "bits %u\n", m.ctm_offset, m.ctm_type, e.cte_bits); + } + + r1 = dt_regset_alloc(drp); + r2 = dt_regset_alloc(drp); + r3 = dt_regset_alloc(drp); + + /* + * Compute shifts and masks. We need to compute "shift" as the amount + * we need to shift left to position our field in the containing word. + * Refer to the comments in dt_cg_field_get(), above, for more info. + * We then compute fmask as the mask that truncates the value in the + * input register to width cte_bits, and cmask as the mask used to + * pass through the containing bits and zero the field bits. + */ +#if BYTE_ORDER == _BIG_ENDIAN + shift = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY) * NBBY - + (m.ctm_offset % NBBY + e.cte_bits); +#else + shift = m.ctm_offset % NBBY; +#endif + fmask = (1ULL << e.cte_bits) - 1; + cmask = ~(fmask << shift); + + instr = DIF_INSTR_LOAD( + dt_cg_load(dst, fp, m.ctm_type), dst->dn_reg, r1); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_setx(dlp, r2, cmask); + instr = DIF_INSTR_FMT(DIF_OP_AND, r1, r2, r1); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_setx(dlp, r2, fmask); + instr = DIF_INSTR_FMT(DIF_OP_AND, src->dn_reg, r2, r2); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_setx(dlp, r3, shift); + instr = DIF_INSTR_FMT(DIF_OP_SLL, r2, r3, r2); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_FMT(DIF_OP_OR, r1, r2, r1); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_regset_free(drp, r3); + dt_regset_free(drp, r2); + + return (r1); +} + +static void +dt_cg_store(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp, dt_node_t *dst) +{ + ctf_encoding_t e; + dif_instr_t instr; + size_t size; + int reg; + + /* + * If we're loading a bit-field, the size of our store is found by + * rounding dst's cte_bits up to a byte boundary and then finding the + * nearest power of two to this value (see clp2(), above). + */ + if ((dst->dn_flags & DT_NF_BITFIELD) && + ctf_type_encoding(dst->dn_ctfp, dst->dn_type, &e) != CTF_ERR) + size = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY); + else + size = dt_node_type_size(src); + + if (src->dn_flags & DT_NF_REF) { + reg = dt_regset_alloc(drp); + dt_cg_setx(dlp, reg, size); + instr = DIF_INSTR_COPYS(src->dn_reg, reg, dst->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, reg); + } else { + if (dst->dn_flags & DT_NF_BITFIELD) + reg = dt_cg_field_set(src, dlp, drp, dst); + else + reg = src->dn_reg; + + switch (size) { + case 1: + instr = DIF_INSTR_STORE(DIF_OP_STB, reg, dst->dn_reg); + break; + case 2: + instr = DIF_INSTR_STORE(DIF_OP_STH, reg, dst->dn_reg); + break; + case 4: + instr = DIF_INSTR_STORE(DIF_OP_STW, reg, dst->dn_reg); + break; + case 8: + instr = DIF_INSTR_STORE(DIF_OP_STX, reg, dst->dn_reg); + break; + default: + xyerror(D_UNKNOWN, "internal error -- cg cannot store " + "size %lu when passed by value\n", (ulong_t)size); + } + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + if (dst->dn_flags & DT_NF_BITFIELD) + dt_regset_free(drp, reg); + } +} + +/* + * Generate code for a typecast or for argument promotion from the type of the + * actual to the type of the formal. We need to generate code for casts when + * a scalar type is being narrowed or changing signed-ness. We first shift the + * desired bits high (losing excess bits if narrowing) and then shift them down + * using logical shift (unsigned result) or arithmetic shift (signed result). + */ +static void +dt_cg_typecast(const dt_node_t *src, const dt_node_t *dst, + dt_irlist_t *dlp, dt_regset_t *drp) +{ + size_t srcsize = dt_node_type_size(src); + size_t dstsize = dt_node_type_size(dst); + + dif_instr_t instr; + int rg; + + if (!dt_node_is_scalar(dst)) + return; /* not a scalar */ + if (dstsize == srcsize && + ((src->dn_flags ^ dst->dn_flags) & DT_NF_SIGNED) != 0) + return; /* not narrowing or changing signed-ness */ + if (dstsize > srcsize && (src->dn_flags & DT_NF_SIGNED) == 0) + return; /* nothing to do in this case */ + + rg = dt_regset_alloc(drp); + + if (dstsize > srcsize) { + int n = sizeof (uint64_t) * NBBY - srcsize * NBBY; + int s = (dstsize - srcsize) * NBBY; + + dt_cg_setx(dlp, rg, n); + + instr = DIF_INSTR_FMT(DIF_OP_SLL, src->dn_reg, rg, dst->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + if ((dst->dn_flags & DT_NF_SIGNED) || n == s) { + instr = DIF_INSTR_FMT(DIF_OP_SRA, + dst->dn_reg, rg, dst->dn_reg); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + } else { + dt_cg_setx(dlp, rg, s); + instr = DIF_INSTR_FMT(DIF_OP_SRA, + dst->dn_reg, rg, dst->dn_reg); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_cg_setx(dlp, rg, n - s); + instr = DIF_INSTR_FMT(DIF_OP_SRL, + dst->dn_reg, rg, dst->dn_reg); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + } + } else if (dstsize != sizeof (uint64_t)) { + int n = sizeof (uint64_t) * NBBY - dstsize * NBBY; + + dt_cg_setx(dlp, rg, n); + + instr = DIF_INSTR_FMT(DIF_OP_SLL, src->dn_reg, rg, dst->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_FMT((dst->dn_flags & DT_NF_SIGNED) ? + DIF_OP_SRA : DIF_OP_SRL, dst->dn_reg, rg, dst->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + } + + dt_regset_free(drp, rg); +} + +/* + * Generate code to push the specified argument list on to the tuple stack. + * We use this routine for handling subroutine calls and associative arrays. + * We must first generate code for all subexpressions before loading the stack + * because any subexpression could itself require the use of the tuple stack. + * This holds a number of registers equal to the number of arguments, but this + * is not a huge problem because the number of arguments can't exceed the + * number of tuple register stack elements anyway. At most one extra register + * is required (either by dt_cg_typecast() or for dtdt_size, below). This + * implies that a DIF implementation should offer a number of general purpose + * registers at least one greater than the number of tuple registers. + */ +static void +dt_cg_arglist(dt_ident_t *idp, dt_node_t *args, + dt_irlist_t *dlp, dt_regset_t *drp) +{ + const dt_idsig_t *isp = idp->di_data; + dt_node_t *dnp; + int i = 0; + + for (dnp = args; dnp != NULL; dnp = dnp->dn_list) + dt_cg_node(dnp, dlp, drp); + + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, DIF_INSTR_FLUSHTS)); + + for (dnp = args; dnp != NULL; dnp = dnp->dn_list, i++) { + dtrace_diftype_t t; + dif_instr_t instr; + uint_t op; + int reg; + + dt_node_diftype(yypcb->pcb_hdl, dnp, &t); + + isp->dis_args[i].dn_reg = dnp->dn_reg; /* re-use register */ + dt_cg_typecast(dnp, &isp->dis_args[i], dlp, drp); + isp->dis_args[i].dn_reg = -1; + + if (t.dtdt_flags & DIF_TF_BYREF) { + op = DIF_OP_PUSHTR; + if (t.dtdt_size != 0) { + reg = dt_regset_alloc(drp); + dt_cg_setx(dlp, reg, t.dtdt_size); + } else { + reg = DIF_REG_R0; + } + } else { + op = DIF_OP_PUSHTV; + reg = DIF_REG_R0; + } + + instr = DIF_INSTR_PUSHTS(op, t.dtdt_kind, reg, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, dnp->dn_reg); + + if (reg != DIF_REG_R0) + dt_regset_free(drp, reg); + } + + if (i > yypcb->pcb_hdl->dt_conf.dtc_diftupregs) + longjmp(yypcb->pcb_jmpbuf, EDT_NOTUPREG); +} + +static void +dt_cg_arithmetic_op(dt_node_t *dnp, dt_irlist_t *dlp, + dt_regset_t *drp, uint_t op) +{ + int is_ptr_op = (dnp->dn_op == DT_TOK_ADD || dnp->dn_op == DT_TOK_SUB || + dnp->dn_op == DT_TOK_ADD_EQ || dnp->dn_op == DT_TOK_SUB_EQ); + + int lp_is_ptr = dt_node_is_pointer(dnp->dn_left); + int rp_is_ptr = dt_node_is_pointer(dnp->dn_right); + + dif_instr_t instr; + + if (lp_is_ptr && rp_is_ptr) { + assert(dnp->dn_op == DT_TOK_SUB); + is_ptr_op = 0; + } + + dt_cg_node(dnp->dn_left, dlp, drp); + if (is_ptr_op && rp_is_ptr) + dt_cg_ptrsize(dnp, dlp, drp, DIF_OP_MUL, dnp->dn_left->dn_reg); + + dt_cg_node(dnp->dn_right, dlp, drp); + if (is_ptr_op && lp_is_ptr) + dt_cg_ptrsize(dnp, dlp, drp, DIF_OP_MUL, dnp->dn_right->dn_reg); + + instr = DIF_INSTR_FMT(op, dnp->dn_left->dn_reg, + dnp->dn_right->dn_reg, dnp->dn_left->dn_reg); + + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, dnp->dn_right->dn_reg); + dnp->dn_reg = dnp->dn_left->dn_reg; + + if (lp_is_ptr && rp_is_ptr) + dt_cg_ptrsize(dnp->dn_right, + dlp, drp, DIF_OP_UDIV, dnp->dn_reg); +} + +static uint_t +dt_cg_stvar(const dt_ident_t *idp) +{ + static const uint_t aops[] = { DIF_OP_STGAA, DIF_OP_STTAA, DIF_OP_NOP }; + static const uint_t sops[] = { DIF_OP_STGS, DIF_OP_STTS, DIF_OP_STLS }; + + uint_t i = (((idp->di_flags & DT_IDFLG_LOCAL) != 0) << 1) | + ((idp->di_flags & DT_IDFLG_TLS) != 0); + + return (idp->di_kind == DT_IDENT_ARRAY ? aops[i] : sops[i]); +} + +static void +dt_cg_prearith_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp, uint_t op) +{ + ctf_file_t *ctfp = dnp->dn_ctfp; + dif_instr_t instr; + ctf_id_t type; + ssize_t size = 1; + int reg; + + if (dt_node_is_pointer(dnp)) { + type = ctf_type_resolve(ctfp, dnp->dn_type); + assert(ctf_type_kind(ctfp, type) == CTF_K_POINTER); + size = ctf_type_size(ctfp, ctf_type_reference(ctfp, type)); + } + + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + + reg = dt_regset_alloc(drp); + dt_cg_setx(dlp, reg, size); + + instr = DIF_INSTR_FMT(op, dnp->dn_reg, reg, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, reg); + + /* + * If we are modifying a variable, generate an stv instruction from + * the variable specified by the identifier. If we are storing to a + * memory address, generate code again for the left-hand side using + * DT_NF_REF to get the address, and then generate a store to it. + * In both paths, we store the value in dnp->dn_reg (the new value). + */ + if (dnp->dn_child->dn_kind == DT_NODE_VAR) { + dt_ident_t *idp = dt_ident_resolve(dnp->dn_child->dn_ident); + + idp->di_flags |= DT_IDFLG_DIFW; + instr = DIF_INSTR_STV(dt_cg_stvar(idp), + idp->di_id, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + } else { + uint_t rbit = dnp->dn_child->dn_flags & DT_NF_REF; + + assert(dnp->dn_child->dn_flags & DT_NF_WRITABLE); + assert(dnp->dn_child->dn_flags & DT_NF_LVALUE); + + dnp->dn_child->dn_flags |= DT_NF_REF; /* force pass-by-ref */ + dt_cg_node(dnp->dn_child, dlp, drp); + + dt_cg_store(dnp, dlp, drp, dnp->dn_child); + dt_regset_free(drp, dnp->dn_child->dn_reg); + + dnp->dn_left->dn_flags &= ~DT_NF_REF; + dnp->dn_left->dn_flags |= rbit; + } +} + +static void +dt_cg_postarith_op(dt_node_t *dnp, dt_irlist_t *dlp, + dt_regset_t *drp, uint_t op) +{ + ctf_file_t *ctfp = dnp->dn_ctfp; + dif_instr_t instr; + ctf_id_t type; + ssize_t size = 1; + int nreg; + + if (dt_node_is_pointer(dnp)) { + type = ctf_type_resolve(ctfp, dnp->dn_type); + assert(ctf_type_kind(ctfp, type) == CTF_K_POINTER); + size = ctf_type_size(ctfp, ctf_type_reference(ctfp, type)); + } + + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + + nreg = dt_regset_alloc(drp); + dt_cg_setx(dlp, nreg, size); + instr = DIF_INSTR_FMT(op, dnp->dn_reg, nreg, nreg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + /* + * If we are modifying a variable, generate an stv instruction from + * the variable specified by the identifier. If we are storing to a + * memory address, generate code again for the left-hand side using + * DT_NF_REF to get the address, and then generate a store to it. + * In both paths, we store the value from 'nreg' (the new value). + */ + if (dnp->dn_child->dn_kind == DT_NODE_VAR) { + dt_ident_t *idp = dt_ident_resolve(dnp->dn_child->dn_ident); + + idp->di_flags |= DT_IDFLG_DIFW; + instr = DIF_INSTR_STV(dt_cg_stvar(idp), idp->di_id, nreg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + } else { + uint_t rbit = dnp->dn_child->dn_flags & DT_NF_REF; + int oreg = dnp->dn_reg; + + assert(dnp->dn_child->dn_flags & DT_NF_WRITABLE); + assert(dnp->dn_child->dn_flags & DT_NF_LVALUE); + + dnp->dn_child->dn_flags |= DT_NF_REF; /* force pass-by-ref */ + dt_cg_node(dnp->dn_child, dlp, drp); + + dnp->dn_reg = nreg; + dt_cg_store(dnp, dlp, drp, dnp->dn_child); + dnp->dn_reg = oreg; + + dt_regset_free(drp, dnp->dn_child->dn_reg); + dnp->dn_left->dn_flags &= ~DT_NF_REF; + dnp->dn_left->dn_flags |= rbit; + } + + dt_regset_free(drp, nreg); +} + +/* + * Determine if we should perform signed or unsigned comparison for an OP2. + * If both operands are of arithmetic type, perform the usual arithmetic + * conversions to determine the common real type for comparison [ISOC 6.5.8.3]. + */ +static int +dt_cg_compare_signed(dt_node_t *dnp) +{ + dt_node_t dn; + + if (dt_node_is_string(dnp->dn_left) || + dt_node_is_string(dnp->dn_right)) + return (1); /* strings always compare signed */ + else if (!dt_node_is_arith(dnp->dn_left) || + !dt_node_is_arith(dnp->dn_right)) + return (0); /* non-arithmetic types always compare unsigned */ + + bzero(&dn, sizeof (dn)); + dt_node_promote(dnp->dn_left, dnp->dn_right, &dn); + return (dn.dn_flags & DT_NF_SIGNED); +} + +static void +dt_cg_compare_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp, uint_t op) +{ + uint_t lbl_true = dt_irlist_label(dlp); + uint_t lbl_post = dt_irlist_label(dlp); + + dif_instr_t instr; + uint_t opc; + + dt_cg_node(dnp->dn_left, dlp, drp); + dt_cg_node(dnp->dn_right, dlp, drp); + + if (dt_node_is_string(dnp->dn_left) || dt_node_is_string(dnp->dn_right)) + opc = DIF_OP_SCMP; + else + opc = DIF_OP_CMP; + + instr = DIF_INSTR_CMP(opc, dnp->dn_left->dn_reg, dnp->dn_right->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, dnp->dn_right->dn_reg); + dnp->dn_reg = dnp->dn_left->dn_reg; + + instr = DIF_INSTR_BRANCH(op, lbl_true); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_MOV(DIF_REG_R0, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_xsetx(dlp, NULL, lbl_true, dnp->dn_reg, 1); + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP)); +} + +/* + * Code generation for the ternary op requires some trickery with the assembler + * in order to conserve registers. We generate code for dn_expr and dn_left + * and free their registers so they do not have be consumed across codegen for + * dn_right. We insert a dummy MOV at the end of dn_left into the destination + * register, which is not yet known because we haven't done dn_right yet, and + * save the pointer to this instruction node. We then generate code for + * dn_right and use its register as our output. Finally, we reach back and + * patch the instruction for dn_left to move its output into this register. + */ +static void +dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + uint_t lbl_false = dt_irlist_label(dlp); + uint_t lbl_post = dt_irlist_label(dlp); + + dif_instr_t instr; + dt_irnode_t *dip; + + dt_cg_node(dnp->dn_expr, dlp, drp); + instr = DIF_INSTR_TST(dnp->dn_expr->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, dnp->dn_expr->dn_reg); + + instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_false); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_node(dnp->dn_left, dlp, drp); + instr = DIF_INSTR_MOV(dnp->dn_left->dn_reg, DIF_REG_R0); + dip = dt_cg_node_alloc(DT_LBL_NONE, instr); /* save dip for below */ + dt_irlist_append(dlp, dip); + dt_regset_free(drp, dnp->dn_left->dn_reg); + + instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_false, DIF_INSTR_NOP)); + dt_cg_node(dnp->dn_right, dlp, drp); + dnp->dn_reg = dnp->dn_right->dn_reg; + + /* + * Now that dn_reg is assigned, reach back and patch the correct MOV + * instruction into the tail of dn_left. We know dn_reg was unused + * at that point because otherwise dn_right couldn't have allocated it. + */ + dip->di_instr = DIF_INSTR_MOV(dnp->dn_left->dn_reg, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP)); +} + +static void +dt_cg_logical_and(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + uint_t lbl_false = dt_irlist_label(dlp); + uint_t lbl_post = dt_irlist_label(dlp); + + dif_instr_t instr; + + dt_cg_node(dnp->dn_left, dlp, drp); + instr = DIF_INSTR_TST(dnp->dn_left->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, dnp->dn_left->dn_reg); + + instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_false); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_node(dnp->dn_right, dlp, drp); + instr = DIF_INSTR_TST(dnp->dn_right->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dnp->dn_reg = dnp->dn_right->dn_reg; + + instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_false); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_setx(dlp, dnp->dn_reg, 1); + + instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_MOV(DIF_REG_R0, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_false, instr)); + + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP)); +} + +static void +dt_cg_logical_xor(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + uint_t lbl_next = dt_irlist_label(dlp); + uint_t lbl_tail = dt_irlist_label(dlp); + + dif_instr_t instr; + + dt_cg_node(dnp->dn_left, dlp, drp); + instr = DIF_INSTR_TST(dnp->dn_left->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_next); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_cg_setx(dlp, dnp->dn_left->dn_reg, 1); + + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_next, DIF_INSTR_NOP)); + dt_cg_node(dnp->dn_right, dlp, drp); + + instr = DIF_INSTR_TST(dnp->dn_right->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_tail); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_cg_setx(dlp, dnp->dn_right->dn_reg, 1); + + instr = DIF_INSTR_FMT(DIF_OP_XOR, dnp->dn_left->dn_reg, + dnp->dn_right->dn_reg, dnp->dn_left->dn_reg); + + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_tail, instr)); + + dt_regset_free(drp, dnp->dn_right->dn_reg); + dnp->dn_reg = dnp->dn_left->dn_reg; +} + +static void +dt_cg_logical_or(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + uint_t lbl_true = dt_irlist_label(dlp); + uint_t lbl_false = dt_irlist_label(dlp); + uint_t lbl_post = dt_irlist_label(dlp); + + dif_instr_t instr; + + dt_cg_node(dnp->dn_left, dlp, drp); + instr = DIF_INSTR_TST(dnp->dn_left->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, dnp->dn_left->dn_reg); + + instr = DIF_INSTR_BRANCH(DIF_OP_BNE, lbl_true); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_node(dnp->dn_right, dlp, drp); + instr = DIF_INSTR_TST(dnp->dn_right->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dnp->dn_reg = dnp->dn_right->dn_reg; + + instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_false); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_xsetx(dlp, NULL, lbl_true, dnp->dn_reg, 1); + + instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_MOV(DIF_REG_R0, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_false, instr)); + + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP)); +} + +static void +dt_cg_logical_neg(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + uint_t lbl_zero = dt_irlist_label(dlp); + uint_t lbl_post = dt_irlist_label(dlp); + + dif_instr_t instr; + + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + + instr = DIF_INSTR_TST(dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_zero); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_MOV(DIF_REG_R0, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_xsetx(dlp, NULL, lbl_zero, dnp->dn_reg, 1); + dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP)); +} + +static void +dt_cg_asgn_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + dif_instr_t instr; + dt_ident_t *idp; + + /* + * If we are performing a structure assignment of a translated type, + * we must instantiate all members and create a snapshot of the object + * in scratch space. We allocs a chunk of memory, generate code for + * each member, and then set dnp->dn_reg to the scratch object address. + */ + if ((idp = dt_node_resolve(dnp->dn_right, DT_IDENT_XLSOU)) != NULL) { + ctf_membinfo_t ctm; + dt_xlator_t *dxp = idp->di_data; + dt_node_t *mnp, dn, mn; + int r1, r2; + + /* + * Create two fake dt_node_t's representing operator "." and a + * right-hand identifier child node. These will be repeatedly + * modified according to each instantiated member so that we + * can pass them to dt_cg_store() and effect a member store. + */ + bzero(&dn, sizeof (dt_node_t)); + dn.dn_kind = DT_NODE_OP2; + dn.dn_op = DT_TOK_DOT; + dn.dn_left = dnp; + dn.dn_right = &mn; + + bzero(&mn, sizeof (dt_node_t)); + mn.dn_kind = DT_NODE_IDENT; + mn.dn_op = DT_TOK_IDENT; + + /* + * Allocate a register for our scratch data pointer. First we + * set it to the size of our data structure, and then replace + * it with the result of an allocs of the specified size. + */ + r1 = dt_regset_alloc(drp); + dt_cg_setx(dlp, r1, + ctf_type_size(dxp->dx_dst_ctfp, dxp->dx_dst_base)); + + instr = DIF_INSTR_ALLOCS(r1, r1); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + /* + * When dt_cg_asgn_op() is called, we have already generated + * code for dnp->dn_right, which is the translator input. We + * now associate this register with the translator's input + * identifier so it can be referenced during our member loop. + */ + dxp->dx_ident->di_flags |= DT_IDFLG_CGREG; + dxp->dx_ident->di_id = dnp->dn_right->dn_reg; + + for (mnp = dxp->dx_members; mnp != NULL; mnp = mnp->dn_list) { + /* + * Generate code for the translator member expression, + * and then cast the result to the member type. + */ + dt_cg_node(mnp->dn_membexpr, dlp, drp); + mnp->dn_reg = mnp->dn_membexpr->dn_reg; + dt_cg_typecast(mnp->dn_membexpr, mnp, dlp, drp); + + /* + * Ask CTF for the offset of the member so we can store + * to the appropriate offset. This call has already + * been done once by the parser, so it should succeed. + */ + if (ctf_member_info(dxp->dx_dst_ctfp, dxp->dx_dst_base, + mnp->dn_membname, &ctm) == CTF_ERR) { + yypcb->pcb_hdl->dt_ctferr = + ctf_errno(dxp->dx_dst_ctfp); + longjmp(yypcb->pcb_jmpbuf, EDT_CTF); + } + + /* + * If the destination member is at offset 0, store the + * result directly to r1 (the scratch buffer address). + * Otherwise allocate another temporary for the offset + * and add r1 to it before storing the result. + */ + if (ctm.ctm_offset != 0) { + r2 = dt_regset_alloc(drp); + + /* + * Add the member offset rounded down to the + * nearest byte. If the offset was not aligned + * on a byte boundary, this member is a bit- + * field and dt_cg_store() will handle masking. + */ + dt_cg_setx(dlp, r2, ctm.ctm_offset / NBBY); + instr = DIF_INSTR_FMT(DIF_OP_ADD, r1, r2, r2); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_node_type_propagate(mnp, &dn); + dn.dn_right->dn_string = mnp->dn_membname; + dn.dn_reg = r2; + + dt_cg_store(mnp, dlp, drp, &dn); + dt_regset_free(drp, r2); + + } else { + dt_node_type_propagate(mnp, &dn); + dn.dn_right->dn_string = mnp->dn_membname; + dn.dn_reg = r1; + + dt_cg_store(mnp, dlp, drp, &dn); + } + + dt_regset_free(drp, mnp->dn_reg); + } + + dxp->dx_ident->di_flags &= ~DT_IDFLG_CGREG; + dxp->dx_ident->di_id = 0; + + if (dnp->dn_right->dn_reg != -1) + dt_regset_free(drp, dnp->dn_right->dn_reg); + + assert(dnp->dn_reg == dnp->dn_right->dn_reg); + dnp->dn_reg = r1; + } + + /* + * If we are storing to a variable, generate an stv instruction from + * the variable specified by the identifier. If we are storing to a + * memory address, generate code again for the left-hand side using + * DT_NF_REF to get the address, and then generate a store to it. + * In both paths, we assume dnp->dn_reg already has the new value. + */ + if (dnp->dn_left->dn_kind == DT_NODE_VAR) { + idp = dt_ident_resolve(dnp->dn_left->dn_ident); + + if (idp->di_kind == DT_IDENT_ARRAY) + dt_cg_arglist(idp, dnp->dn_left->dn_args, dlp, drp); + + idp->di_flags |= DT_IDFLG_DIFW; + instr = DIF_INSTR_STV(dt_cg_stvar(idp), + idp->di_id, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + } else { + uint_t rbit = dnp->dn_left->dn_flags & DT_NF_REF; + + assert(dnp->dn_left->dn_flags & DT_NF_WRITABLE); + assert(dnp->dn_left->dn_flags & DT_NF_LVALUE); + + dnp->dn_left->dn_flags |= DT_NF_REF; /* force pass-by-ref */ + + dt_cg_node(dnp->dn_left, dlp, drp); + dt_cg_store(dnp, dlp, drp, dnp->dn_left); + dt_regset_free(drp, dnp->dn_left->dn_reg); + + dnp->dn_left->dn_flags &= ~DT_NF_REF; + dnp->dn_left->dn_flags |= rbit; + } +} + +static void +dt_cg_assoc_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + dif_instr_t instr; + uint_t op; + + assert(dnp->dn_kind == DT_NODE_VAR); + assert(!(dnp->dn_ident->di_flags & DT_IDFLG_LOCAL)); + assert(dnp->dn_args != NULL); + + dt_cg_arglist(dnp->dn_ident, dnp->dn_args, dlp, drp); + + dnp->dn_reg = dt_regset_alloc(drp); + + if (dnp->dn_ident->di_flags & DT_IDFLG_TLS) + op = DIF_OP_LDTAA; + else + op = DIF_OP_LDGAA; + + dnp->dn_ident->di_flags |= DT_IDFLG_DIFR; + instr = DIF_INSTR_LDV(op, dnp->dn_ident->di_id, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + /* + * If the associative array is a pass-by-reference type, then we are + * loading its value as a pointer to either load or store through it. + * The array element in question may not have been faulted in yet, in + * which case DIF_OP_LD*AA will return zero. We append an epilogue + * of instructions similar to the following: + * + * ld?aa id, %r1 ! base ld?aa instruction above + * tst %r1 ! start of epilogue + * +--- bne label + * | setx size, %r1 + * | allocs %r1, %r1 + * | st?aa id, %r1 + * | ld?aa id, %r1 + * v + * label: < rest of code > + * + * The idea is that we allocs a zero-filled chunk of scratch space and + * do a DIF_OP_ST*AA to fault in and initialize the array element, and + * then reload it to get the faulted-in address of the new variable + * storage. This isn't cheap, but pass-by-ref associative array values + * are (thus far) uncommon and the allocs cost only occurs once. If + * this path becomes important to DTrace users, we can improve things + * by adding a new DIF opcode to fault in associative array elements. + */ + if (dnp->dn_flags & DT_NF_REF) { + uint_t stvop = op == DIF_OP_LDTAA ? DIF_OP_STTAA : DIF_OP_STGAA; + uint_t label = dt_irlist_label(dlp); + + instr = DIF_INSTR_TST(dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_BRANCH(DIF_OP_BNE, label); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_cg_setx(dlp, dnp->dn_reg, dt_node_type_size(dnp)); + instr = DIF_INSTR_ALLOCS(dnp->dn_reg, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dnp->dn_ident->di_flags |= DT_IDFLG_DIFW; + instr = DIF_INSTR_STV(stvop, dnp->dn_ident->di_id, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_LDV(op, dnp->dn_ident->di_id, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dt_irlist_append(dlp, dt_cg_node_alloc(label, DIF_INSTR_NOP)); + } +} + +static void +dt_cg_array_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + dt_probe_t *prp = yypcb->pcb_probe; + uintmax_t saved = dnp->dn_args->dn_value; + dt_ident_t *idp = dnp->dn_ident; + + dif_instr_t instr; + uint_t op; + size_t size; + int reg, n; + + assert(dnp->dn_kind == DT_NODE_VAR); + assert(!(idp->di_flags & DT_IDFLG_LOCAL)); + + assert(dnp->dn_args->dn_kind == DT_NODE_INT); + assert(dnp->dn_args->dn_list == NULL); + + /* + * If this is a reference in the args[] array, temporarily modify the + * array index according to the static argument mapping (if any), + * unless the argument reference is provided by a dynamic translator. + * If we're using a dynamic translator for args[], then just set dn_reg + * to an invalid reg and return: DIF_OP_XLARG will fetch the arg later. + */ + if (idp->di_id == DIF_VAR_ARGS) { + if ((idp->di_kind == DT_IDENT_XLPTR || + idp->di_kind == DT_IDENT_XLSOU) && + dt_xlator_dynamic(idp->di_data)) { + dnp->dn_reg = -1; + return; + } + dnp->dn_args->dn_value = prp->pr_mapping[saved]; + } + + dt_cg_node(dnp->dn_args, dlp, drp); + dnp->dn_args->dn_value = saved; + + dnp->dn_reg = dnp->dn_args->dn_reg; + + if (idp->di_flags & DT_IDFLG_TLS) + op = DIF_OP_LDTA; + else + op = DIF_OP_LDGA; + + idp->di_flags |= DT_IDFLG_DIFR; + + instr = DIF_INSTR_LDA(op, idp->di_id, + dnp->dn_args->dn_reg, dnp->dn_reg); + + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + /* + * If this is a reference to the args[] array, we need to take the + * additional step of explicitly eliminating any bits larger than the + * type size: the DIF interpreter in the kernel will always give us + * the raw (64-bit) argument value, and any bits larger than the type + * size may be junk. As a practical matter, this arises only on 64-bit + * architectures and only when the argument index is larger than the + * number of arguments passed directly to DTrace: if a 8-, 16- or + * 32-bit argument must be retrieved from the stack, it is possible + * (and it some cases, likely) that the upper bits will be garbage. + */ + if (idp->di_id != DIF_VAR_ARGS || !dt_node_is_scalar(dnp)) + return; + + if ((size = dt_node_type_size(dnp)) == sizeof (uint64_t)) + return; + + reg = dt_regset_alloc(drp); + assert(size < sizeof (uint64_t)); + n = sizeof (uint64_t) * NBBY - size * NBBY; + + dt_cg_setx(dlp, reg, n); + + instr = DIF_INSTR_FMT(DIF_OP_SLL, dnp->dn_reg, reg, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_FMT((dnp->dn_flags & DT_NF_SIGNED) ? + DIF_OP_SRA : DIF_OP_SRL, dnp->dn_reg, reg, dnp->dn_reg); + + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, reg); +} + +/* + * Generate code for an inlined variable reference. Inlines can be used to + * define either scalar or associative array substitutions. For scalars, we + * simply generate code for the parse tree saved in the identifier's din_root, + * and then cast the resulting expression to the inline's declaration type. + * For arrays, we take the input parameter subtrees from dnp->dn_args and + * temporarily store them in the din_root of each din_argv[i] identifier, + * which are themselves inlines and were set up for us by the parser. The + * result is that any reference to the inlined parameter inside the top-level + * din_root will turn into a recursive call to dt_cg_inline() for a scalar + * inline whose din_root will refer to the subtree pointed to by the argument. + */ +static void +dt_cg_inline(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + dt_ident_t *idp = dnp->dn_ident; + dt_idnode_t *inp = idp->di_iarg; + + dt_idnode_t *pinp; + dt_node_t *pnp; + int i; + + assert(idp->di_flags & DT_IDFLG_INLINE); + assert(idp->di_ops == &dt_idops_inline); + + if (idp->di_kind == DT_IDENT_ARRAY) { + for (i = 0, pnp = dnp->dn_args; + pnp != NULL; pnp = pnp->dn_list, i++) { + if (inp->din_argv[i] != NULL) { + pinp = inp->din_argv[i]->di_iarg; + pinp->din_root = pnp; + } + } + } + + dt_cg_node(inp->din_root, dlp, drp); + dnp->dn_reg = inp->din_root->dn_reg; + dt_cg_typecast(inp->din_root, dnp, dlp, drp); + + if (idp->di_kind == DT_IDENT_ARRAY) { + for (i = 0; i < inp->din_argc; i++) { + pinp = inp->din_argv[i]->di_iarg; + pinp->din_root = NULL; + } + } +} + +static void +dt_cg_func_typeref(dtrace_hdl_t *dtp, dt_node_t *dnp) +{ + dtrace_typeinfo_t dtt; + dt_node_t *addr = dnp->dn_args; + dt_node_t *nelm = addr->dn_list; + dt_node_t *strp = nelm->dn_list; + dt_node_t *typs = strp->dn_list; + char buf[DT_TYPE_NAMELEN]; + char *p; + + ctf_type_name(addr->dn_ctfp, addr->dn_type, buf, sizeof (buf)); + + /* + * XXX Hack alert! XXX + * The prototype has two dummy args that we munge to represent + * the type string and the type size. + * + * Yes, I hear your grumble, but it works for now. We'll come + * up with a more elegant implementation later. :-) + */ + free(strp->dn_string); + + if ((p = strchr(buf, '*')) != NULL) + *p = '\0'; + + strp->dn_string = strdup(buf); + + if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_EVERY, buf, &dtt) < 0) + return; + + typs->dn_value = ctf_type_size(dtt.dtt_ctfp, dtt.dtt_type); +} + +typedef struct dt_xlmemb { + dt_ident_t *dtxl_idp; /* translated ident */ + dt_irlist_t *dtxl_dlp; /* instruction list */ + dt_regset_t *dtxl_drp; /* register set */ + int dtxl_sreg; /* location of the translation input */ + int dtxl_dreg; /* location of our allocated buffer */ +} dt_xlmemb_t; + +/*ARGSUSED*/ +static int +dt_cg_xlate_member(const char *name, ctf_id_t type, ulong_t off, void *arg) +{ + dt_xlmemb_t *dx = arg; + dt_ident_t *idp = dx->dtxl_idp; + dt_irlist_t *dlp = dx->dtxl_dlp; + dt_regset_t *drp = dx->dtxl_drp; + + dt_node_t *mnp; + dt_xlator_t *dxp; + + int reg, treg; + uint32_t instr; + size_t size; + + /* Generate code for the translation. */ + dxp = idp->di_data; + mnp = dt_xlator_member(dxp, name); + + /* If there's no translator for the given member, skip it. */ + if (mnp == NULL) + return (0); + + dxp->dx_ident->di_flags |= DT_IDFLG_CGREG; + dxp->dx_ident->di_id = dx->dtxl_sreg; + + dt_cg_node(mnp->dn_membexpr, dlp, drp); + + dxp->dx_ident->di_flags &= ~DT_IDFLG_CGREG; + dxp->dx_ident->di_id = 0; + + treg = mnp->dn_membexpr->dn_reg; + + /* Compute the offset into our buffer and store the result there. */ + reg = dt_regset_alloc(drp); + + dt_cg_setx(dlp, reg, off / NBBY); + instr = DIF_INSTR_FMT(DIF_OP_ADD, dx->dtxl_dreg, reg, reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + size = ctf_type_size(mnp->dn_membexpr->dn_ctfp, + mnp->dn_membexpr->dn_type); + if (dt_node_is_scalar(mnp->dn_membexpr)) { + /* + * Copying scalars is simple. + */ + switch (size) { + case 1: + instr = DIF_INSTR_STORE(DIF_OP_STB, treg, reg); + break; + case 2: + instr = DIF_INSTR_STORE(DIF_OP_STH, treg, reg); + break; + case 4: + instr = DIF_INSTR_STORE(DIF_OP_STW, treg, reg); + break; + case 8: + instr = DIF_INSTR_STORE(DIF_OP_STX, treg, reg); + break; + default: + xyerror(D_UNKNOWN, "internal error -- unexpected " + "size: %lu\n", (ulong_t)size); + } + + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + } else if (dt_node_is_string(mnp->dn_membexpr)) { + int szreg; + + /* + * Use the copys instruction for strings. + */ + szreg = dt_regset_alloc(drp); + dt_cg_setx(dlp, szreg, size); + instr = DIF_INSTR_COPYS(treg, szreg, reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, szreg); + } else { + int szreg; + + /* + * If it's anything else then we'll just bcopy it. + */ + szreg = dt_regset_alloc(drp); + dt_cg_setx(dlp, szreg, size); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, DIF_INSTR_FLUSHTS)); + instr = DIF_INSTR_PUSHTS(DIF_OP_PUSHTV, DIF_TYPE_CTF, + DIF_REG_R0, treg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + instr = DIF_INSTR_PUSHTS(DIF_OP_PUSHTV, DIF_TYPE_CTF, + DIF_REG_R0, reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + instr = DIF_INSTR_PUSHTS(DIF_OP_PUSHTV, DIF_TYPE_CTF, + DIF_REG_R0, szreg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + instr = DIF_INSTR_CALL(DIF_SUBR_BCOPY, szreg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, szreg); + } + + dt_regset_free(drp, reg); + dt_regset_free(drp, treg); + + return (0); +} + +/* + * If we're expanding a translated type, we create an appropriately sized + * buffer with alloca() and then translate each member into it. + */ +static int +dt_cg_xlate_expand(dt_node_t *dnp, dt_ident_t *idp, dt_irlist_t *dlp, + dt_regset_t *drp) +{ + dt_xlmemb_t dlm; + uint32_t instr; + int dreg; + size_t size; + + dreg = dt_regset_alloc(drp); + size = ctf_type_size(dnp->dn_ident->di_ctfp, dnp->dn_ident->di_type); + + /* Call alloca() to create the buffer. */ + dt_cg_setx(dlp, dreg, size); + + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, DIF_INSTR_FLUSHTS)); + + instr = DIF_INSTR_PUSHTS(DIF_OP_PUSHTV, DIF_TYPE_CTF, DIF_REG_R0, dreg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + instr = DIF_INSTR_CALL(DIF_SUBR_ALLOCA, dreg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + /* Generate the translation for each member. */ + dlm.dtxl_idp = idp; + dlm.dtxl_dlp = dlp; + dlm.dtxl_drp = drp; + dlm.dtxl_sreg = dnp->dn_reg; + dlm.dtxl_dreg = dreg; + (void) ctf_member_iter(dnp->dn_ident->di_ctfp, + dnp->dn_ident->di_type, dt_cg_xlate_member, + &dlm); + + return (dreg); +} + +static void +dt_cg_node(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp) +{ + ctf_file_t *ctfp = dnp->dn_ctfp; + ctf_file_t *octfp; + ctf_membinfo_t m; + ctf_id_t type; + + dif_instr_t instr; + dt_ident_t *idp; + ssize_t stroff; + uint_t op; + + switch (dnp->dn_op) { + case DT_TOK_COMMA: + dt_cg_node(dnp->dn_left, dlp, drp); + dt_regset_free(drp, dnp->dn_left->dn_reg); + dt_cg_node(dnp->dn_right, dlp, drp); + dnp->dn_reg = dnp->dn_right->dn_reg; + break; + + case DT_TOK_ASGN: + dt_cg_node(dnp->dn_right, dlp, drp); + dnp->dn_reg = dnp->dn_right->dn_reg; + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_ADD_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_ADD); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_SUB_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_SUB); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_MUL_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_MUL); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_DIV_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, + (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SDIV : DIF_OP_UDIV); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_MOD_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, + (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SREM : DIF_OP_UREM); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_AND_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_AND); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_XOR_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_XOR); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_OR_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_OR); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_LSH_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_SLL); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_RSH_EQ: + dt_cg_arithmetic_op(dnp, dlp, drp, + (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SRA : DIF_OP_SRL); + dt_cg_asgn_op(dnp, dlp, drp); + break; + + case DT_TOK_QUESTION: + dt_cg_ternary_op(dnp, dlp, drp); + break; + + case DT_TOK_LOR: + dt_cg_logical_or(dnp, dlp, drp); + break; + + case DT_TOK_LXOR: + dt_cg_logical_xor(dnp, dlp, drp); + break; + + case DT_TOK_LAND: + dt_cg_logical_and(dnp, dlp, drp); + break; + + case DT_TOK_BOR: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_OR); + break; + + case DT_TOK_XOR: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_XOR); + break; + + case DT_TOK_BAND: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_AND); + break; + + case DT_TOK_EQU: + dt_cg_compare_op(dnp, dlp, drp, DIF_OP_BE); + break; + + case DT_TOK_NEQ: + dt_cg_compare_op(dnp, dlp, drp, DIF_OP_BNE); + break; + + case DT_TOK_LT: + dt_cg_compare_op(dnp, dlp, drp, + dt_cg_compare_signed(dnp) ? DIF_OP_BL : DIF_OP_BLU); + break; + + case DT_TOK_LE: + dt_cg_compare_op(dnp, dlp, drp, + dt_cg_compare_signed(dnp) ? DIF_OP_BLE : DIF_OP_BLEU); + break; + + case DT_TOK_GT: + dt_cg_compare_op(dnp, dlp, drp, + dt_cg_compare_signed(dnp) ? DIF_OP_BG : DIF_OP_BGU); + break; + + case DT_TOK_GE: + dt_cg_compare_op(dnp, dlp, drp, + dt_cg_compare_signed(dnp) ? DIF_OP_BGE : DIF_OP_BGEU); + break; + + case DT_TOK_LSH: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_SLL); + break; + + case DT_TOK_RSH: + dt_cg_arithmetic_op(dnp, dlp, drp, + (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SRA : DIF_OP_SRL); + break; + + case DT_TOK_ADD: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_ADD); + break; + + case DT_TOK_SUB: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_SUB); + break; + + case DT_TOK_MUL: + dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_MUL); + break; + + case DT_TOK_DIV: + dt_cg_arithmetic_op(dnp, dlp, drp, + (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SDIV : DIF_OP_UDIV); + break; + + case DT_TOK_MOD: + dt_cg_arithmetic_op(dnp, dlp, drp, + (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SREM : DIF_OP_UREM); + break; + + case DT_TOK_LNEG: + dt_cg_logical_neg(dnp, dlp, drp); + break; + + case DT_TOK_BNEG: + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + instr = DIF_INSTR_NOT(dnp->dn_reg, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + break; + + case DT_TOK_PREINC: + dt_cg_prearith_op(dnp, dlp, drp, DIF_OP_ADD); + break; + + case DT_TOK_POSTINC: + dt_cg_postarith_op(dnp, dlp, drp, DIF_OP_ADD); + break; + + case DT_TOK_PREDEC: + dt_cg_prearith_op(dnp, dlp, drp, DIF_OP_SUB); + break; + + case DT_TOK_POSTDEC: + dt_cg_postarith_op(dnp, dlp, drp, DIF_OP_SUB); + break; + + case DT_TOK_IPOS: + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + break; + + case DT_TOK_INEG: + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + + instr = DIF_INSTR_FMT(DIF_OP_SUB, DIF_REG_R0, + dnp->dn_reg, dnp->dn_reg); + + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + break; + + case DT_TOK_DEREF: + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + + if (dt_node_is_dynamic(dnp->dn_child)) { + int reg; + idp = dt_node_resolve(dnp->dn_child, DT_IDENT_XLPTR); + assert(idp != NULL); + reg = dt_cg_xlate_expand(dnp, idp, dlp, drp); + + dt_regset_free(drp, dnp->dn_child->dn_reg); + dnp->dn_reg = reg; + + } else if (!(dnp->dn_flags & DT_NF_REF)) { + uint_t ubit = dnp->dn_flags & DT_NF_USERLAND; + + /* + * Save and restore DT_NF_USERLAND across dt_cg_load(): + * we need the sign bit from dnp and the user bit from + * dnp->dn_child in order to get the proper opcode. + */ + dnp->dn_flags |= + (dnp->dn_child->dn_flags & DT_NF_USERLAND); + + instr = DIF_INSTR_LOAD(dt_cg_load(dnp, ctfp, + dnp->dn_type), dnp->dn_reg, dnp->dn_reg); + + dnp->dn_flags &= ~DT_NF_USERLAND; + dnp->dn_flags |= ubit; + + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + } + break; + + case DT_TOK_ADDROF: { + uint_t rbit = dnp->dn_child->dn_flags & DT_NF_REF; + + dnp->dn_child->dn_flags |= DT_NF_REF; /* force pass-by-ref */ + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + + dnp->dn_child->dn_flags &= ~DT_NF_REF; + dnp->dn_child->dn_flags |= rbit; + break; + } + + case DT_TOK_SIZEOF: { + size_t size = dt_node_sizeof(dnp->dn_child); + dnp->dn_reg = dt_regset_alloc(drp); + assert(size != 0); + dt_cg_setx(dlp, dnp->dn_reg, size); + break; + } + + case DT_TOK_STRINGOF: + dt_cg_node(dnp->dn_child, dlp, drp); + dnp->dn_reg = dnp->dn_child->dn_reg; + break; + + case DT_TOK_XLATE: + /* + * An xlate operator appears in either an XLATOR, indicating a + * reference to a dynamic translator, or an OP2, indicating + * use of the xlate operator in the user's program. For the + * dynamic case, generate an xlate opcode with a reference to + * the corresponding member, pre-computed for us in dn_members. + */ + if (dnp->dn_kind == DT_NODE_XLATOR) { + dt_xlator_t *dxp = dnp->dn_xlator; + + assert(dxp->dx_ident->di_flags & DT_IDFLG_CGREG); + assert(dxp->dx_ident->di_id != 0); + + dnp->dn_reg = dt_regset_alloc(drp); + + if (dxp->dx_arg == -1) { + instr = DIF_INSTR_MOV( + dxp->dx_ident->di_id, dnp->dn_reg); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + op = DIF_OP_XLATE; + } else + op = DIF_OP_XLARG; + + instr = DIF_INSTR_XLATE(op, 0, dnp->dn_reg); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + + dlp->dl_last->di_extern = dnp->dn_xmember; + break; + } + + assert(dnp->dn_kind == DT_NODE_OP2); + dt_cg_node(dnp->dn_right, dlp, drp); + dnp->dn_reg = dnp->dn_right->dn_reg; + break; + + case DT_TOK_LPAR: + dt_cg_node(dnp->dn_right, dlp, drp); + dnp->dn_reg = dnp->dn_right->dn_reg; + dt_cg_typecast(dnp->dn_right, dnp, dlp, drp); + break; + + case DT_TOK_PTR: + case DT_TOK_DOT: + assert(dnp->dn_right->dn_kind == DT_NODE_IDENT); + dt_cg_node(dnp->dn_left, dlp, drp); + + /* + * If the left-hand side of PTR or DOT is a dynamic variable, + * we expect it to be the output of a D translator. In this + * case, we look up the parse tree corresponding to the member + * that is being accessed and run the code generator over it. + * We then cast the result as if by the assignment operator. + */ + if ((idp = dt_node_resolve( + dnp->dn_left, DT_IDENT_XLSOU)) != NULL || + (idp = dt_node_resolve( + dnp->dn_left, DT_IDENT_XLPTR)) != NULL) { + + dt_xlator_t *dxp; + dt_node_t *mnp; + + dxp = idp->di_data; + mnp = dt_xlator_member(dxp, dnp->dn_right->dn_string); + assert(mnp != NULL); + + dxp->dx_ident->di_flags |= DT_IDFLG_CGREG; + dxp->dx_ident->di_id = dnp->dn_left->dn_reg; + + dt_cg_node(mnp->dn_membexpr, dlp, drp); + dnp->dn_reg = mnp->dn_membexpr->dn_reg; + dt_cg_typecast(mnp->dn_membexpr, dnp, dlp, drp); + + dxp->dx_ident->di_flags &= ~DT_IDFLG_CGREG; + dxp->dx_ident->di_id = 0; + + if (dnp->dn_left->dn_reg != -1) + dt_regset_free(drp, dnp->dn_left->dn_reg); + break; + } + + ctfp = dnp->dn_left->dn_ctfp; + type = ctf_type_resolve(ctfp, dnp->dn_left->dn_type); + + if (dnp->dn_op == DT_TOK_PTR) { + type = ctf_type_reference(ctfp, type); + type = ctf_type_resolve(ctfp, type); + } + + if ((ctfp = dt_cg_membinfo(octfp = ctfp, type, + dnp->dn_right->dn_string, &m)) == NULL) { + yypcb->pcb_hdl->dt_ctferr = ctf_errno(octfp); + longjmp(yypcb->pcb_jmpbuf, EDT_CTF); + } + + if (m.ctm_offset != 0) { + int reg; + + reg = dt_regset_alloc(drp); + + /* + * If the offset is not aligned on a byte boundary, it + * is a bit-field member and we will extract the value + * bits below after we generate the appropriate load. + */ + dt_cg_setx(dlp, reg, m.ctm_offset / NBBY); + + instr = DIF_INSTR_FMT(DIF_OP_ADD, + dnp->dn_left->dn_reg, reg, dnp->dn_left->dn_reg); + + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + dt_regset_free(drp, reg); + } + + if (!(dnp->dn_flags & DT_NF_REF)) { + uint_t ubit = dnp->dn_flags & DT_NF_USERLAND; + + /* + * Save and restore DT_NF_USERLAND across dt_cg_load(): + * we need the sign bit from dnp and the user bit from + * dnp->dn_left in order to get the proper opcode. + */ + dnp->dn_flags |= + (dnp->dn_left->dn_flags & DT_NF_USERLAND); + + instr = DIF_INSTR_LOAD(dt_cg_load(dnp, + ctfp, m.ctm_type), dnp->dn_left->dn_reg, + dnp->dn_left->dn_reg); + + dnp->dn_flags &= ~DT_NF_USERLAND; + dnp->dn_flags |= ubit; + + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + + if (dnp->dn_flags & DT_NF_BITFIELD) + dt_cg_field_get(dnp, dlp, drp, ctfp, &m); + } + + dnp->dn_reg = dnp->dn_left->dn_reg; + break; + + case DT_TOK_STRING: + dnp->dn_reg = dt_regset_alloc(drp); + + assert(dnp->dn_kind == DT_NODE_STRING); + stroff = dt_strtab_insert(yypcb->pcb_strtab, dnp->dn_string); + + if (stroff == -1L) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + if (stroff > DIF_STROFF_MAX) + longjmp(yypcb->pcb_jmpbuf, EDT_STR2BIG); + + instr = DIF_INSTR_SETS((ulong_t)stroff, dnp->dn_reg); + dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr)); + break; + + case DT_TOK_IDENT: + /* + * If the specified identifier is a variable on which we have + * set the code generator register flag, then this variable + * has already had code generated for it and saved in di_id. + * Allocate a new register and copy the existing value to it. + */ + if (dnp->dn_kind == DT_NODE_VAR && + (dnp->dn_ident->di_flags & DT_IDFLG_CGREG)) { + dnp->dn_reg = dt_regset_alloc(drp); + instr = DIF_INSTR_MOV(dnp->dn_ident->di_id, + dnp->dn_reg); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + break; + } + + /* + * Identifiers can represent function calls, variable refs, or + * symbols. First we check for inlined variables, and handle + * them by generating code for the inline parse tree. + */ + if (dnp->dn_kind == DT_NODE_VAR && + (dnp->dn_ident->di_flags & DT_IDFLG_INLINE)) { + dt_cg_inline(dnp, dlp, drp); + break; + } + + switch (dnp->dn_kind) { + case DT_NODE_FUNC: { + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + if ((idp = dnp->dn_ident)->di_kind != DT_IDENT_FUNC) { + dnerror(dnp, D_CG_EXPR, "%s %s( ) may not be " + "called from a D expression (D program " + "context required)\n", + dt_idkind_name(idp->di_kind), idp->di_name); + } + + switch (idp->di_id) { + case DIF_SUBR_TYPEREF: + dt_cg_func_typeref(dtp, dnp); + break; + + default: + break; + } + + dt_cg_arglist(dnp->dn_ident, dnp->dn_args, dlp, drp); + + dnp->dn_reg = dt_regset_alloc(drp); + instr = DIF_INSTR_CALL(dnp->dn_ident->di_id, + dnp->dn_reg); + + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + + break; + } + + case DT_NODE_VAR: + if (dnp->dn_ident->di_kind == DT_IDENT_XLSOU || + dnp->dn_ident->di_kind == DT_IDENT_XLPTR) { + /* + * This can only happen if we have translated + * args[]. See dt_idcook_args() for details. + */ + assert(dnp->dn_ident->di_id == DIF_VAR_ARGS); + dt_cg_array_op(dnp, dlp, drp); + break; + } + + if (dnp->dn_ident->di_kind == DT_IDENT_ARRAY) { + if (dnp->dn_ident->di_id > DIF_VAR_ARRAY_MAX) + dt_cg_assoc_op(dnp, dlp, drp); + else + dt_cg_array_op(dnp, dlp, drp); + break; + } + + dnp->dn_reg = dt_regset_alloc(drp); + + if (dnp->dn_ident->di_flags & DT_IDFLG_LOCAL) + op = DIF_OP_LDLS; + else if (dnp->dn_ident->di_flags & DT_IDFLG_TLS) + op = DIF_OP_LDTS; + else + op = DIF_OP_LDGS; + + dnp->dn_ident->di_flags |= DT_IDFLG_DIFR; + + instr = DIF_INSTR_LDV(op, + dnp->dn_ident->di_id, dnp->dn_reg); + + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + break; + + case DT_NODE_SYM: { + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dtrace_syminfo_t *sip = dnp->dn_ident->di_data; + GElf_Sym sym; + + if (dtrace_lookup_by_name(dtp, + sip->dts_object, sip->dts_name, &sym, NULL) == -1) { + xyerror(D_UNKNOWN, "cg failed for symbol %s`%s:" + " %s\n", sip->dts_object, sip->dts_name, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + + dnp->dn_reg = dt_regset_alloc(drp); + dt_cg_xsetx(dlp, dnp->dn_ident, + DT_LBL_NONE, dnp->dn_reg, sym.st_value); + + if (!(dnp->dn_flags & DT_NF_REF)) { + instr = DIF_INSTR_LOAD(dt_cg_load(dnp, ctfp, + dnp->dn_type), dnp->dn_reg, dnp->dn_reg); + dt_irlist_append(dlp, + dt_cg_node_alloc(DT_LBL_NONE, instr)); + } + break; + } + + default: + xyerror(D_UNKNOWN, "internal error -- node type %u is " + "not valid for an identifier\n", dnp->dn_kind); + } + break; + + case DT_TOK_INT: + dnp->dn_reg = dt_regset_alloc(drp); + dt_cg_setx(dlp, dnp->dn_reg, dnp->dn_value); + break; + + default: + xyerror(D_UNKNOWN, "internal error -- token type %u is not a " + "valid D compilation token\n", dnp->dn_op); + } +} + +void +dt_cg(dt_pcb_t *pcb, dt_node_t *dnp) +{ + dif_instr_t instr; + dt_xlator_t *dxp; + dt_ident_t *idp; + + if (pcb->pcb_regs == NULL && (pcb->pcb_regs = + dt_regset_create(pcb->pcb_hdl->dt_conf.dtc_difintregs)) == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + dt_regset_reset(pcb->pcb_regs); + (void) dt_regset_alloc(pcb->pcb_regs); /* allocate %r0 */ + + if (pcb->pcb_inttab != NULL) + dt_inttab_destroy(pcb->pcb_inttab); + + if ((pcb->pcb_inttab = dt_inttab_create(yypcb->pcb_hdl)) == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + if (pcb->pcb_strtab != NULL) + dt_strtab_destroy(pcb->pcb_strtab); + + if ((pcb->pcb_strtab = dt_strtab_create(BUFSIZ)) == NULL) + longjmp(pcb->pcb_jmpbuf, EDT_NOMEM); + + dt_irlist_destroy(&pcb->pcb_ir); + dt_irlist_create(&pcb->pcb_ir); + + assert(pcb->pcb_dret == NULL); + pcb->pcb_dret = dnp; + + if (dt_node_resolve(dnp, DT_IDENT_XLPTR) != NULL) { + dnerror(dnp, D_CG_DYN, "expression cannot evaluate to result " + "of a translated pointer\n"); + } + + /* + * If we're generating code for a translator body, assign the input + * parameter to the first available register (i.e. caller passes %r1). + */ + if (dnp->dn_kind == DT_NODE_MEMBER) { + dxp = dnp->dn_membxlator; + dnp = dnp->dn_membexpr; + + dxp->dx_ident->di_flags |= DT_IDFLG_CGREG; + dxp->dx_ident->di_id = dt_regset_alloc(pcb->pcb_regs); + } + + dt_cg_node(dnp, &pcb->pcb_ir, pcb->pcb_regs); + + if ((idp = dt_node_resolve(dnp, DT_IDENT_XLSOU)) != NULL) { + int reg = dt_cg_xlate_expand(dnp, idp, + &pcb->pcb_ir, pcb->pcb_regs); + dt_regset_free(pcb->pcb_regs, dnp->dn_reg); + dnp->dn_reg = reg; + } + + instr = DIF_INSTR_RET(dnp->dn_reg); + dt_regset_free(pcb->pcb_regs, dnp->dn_reg); + dt_irlist_append(&pcb->pcb_ir, dt_cg_node_alloc(DT_LBL_NONE, instr)); + + if (dnp->dn_kind == DT_NODE_MEMBER) { + dt_regset_free(pcb->pcb_regs, dxp->dx_ident->di_id); + dxp->dx_ident->di_id = 0; + dxp->dx_ident->di_flags &= ~DT_IDFLG_CGREG; + } + + dt_regset_free(pcb->pcb_regs, 0); + dt_regset_assert_free(pcb->pcb_regs); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c new file mode 100644 index 0000000..072cb05 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c @@ -0,0 +1,3384 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <stdlib.h> +#include <strings.h> +#include <errno.h> +#include <unistd.h> +#include <limits.h> +#include <assert.h> +#include <ctype.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <dt_impl.h> +#include <dt_pq.h> +#if !defined(sun) +#include <libproc_compat.h> +#endif + +#define DT_MASK_LO 0x00000000FFFFFFFFULL + +/* + * We declare this here because (1) we need it and (2) we want to avoid a + * dependency on libm in libdtrace. + */ +static long double +dt_fabsl(long double x) +{ + if (x < 0) + return (-x); + + return (x); +} + +static int +dt_ndigits(long long val) +{ + int rval = 1; + long long cmp = 10; + + if (val < 0) { + val = val == INT64_MIN ? INT64_MAX : -val; + rval++; + } + + while (val > cmp && cmp > 0) { + rval++; + cmp *= 10; + } + + return (rval < 4 ? 4 : rval); +} + +/* + * 128-bit arithmetic functions needed to support the stddev() aggregating + * action. + */ +static int +dt_gt_128(uint64_t *a, uint64_t *b) +{ + return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0])); +} + +static int +dt_ge_128(uint64_t *a, uint64_t *b) +{ + return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0])); +} + +static int +dt_le_128(uint64_t *a, uint64_t *b) +{ + return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0])); +} + +/* + * Shift the 128-bit value in a by b. If b is positive, shift left. + * If b is negative, shift right. + */ +static void +dt_shift_128(uint64_t *a, int b) +{ + uint64_t mask; + + if (b == 0) + return; + + if (b < 0) { + b = -b; + if (b >= 64) { + a[0] = a[1] >> (b - 64); + a[1] = 0; + } else { + a[0] >>= b; + mask = 1LL << (64 - b); + mask -= 1; + a[0] |= ((a[1] & mask) << (64 - b)); + a[1] >>= b; + } + } else { + if (b >= 64) { + a[1] = a[0] << (b - 64); + a[0] = 0; + } else { + a[1] <<= b; + mask = a[0] >> (64 - b); + a[1] |= mask; + a[0] <<= b; + } + } +} + +static int +dt_nbits_128(uint64_t *a) +{ + int nbits = 0; + uint64_t tmp[2]; + uint64_t zero[2] = { 0, 0 }; + + tmp[0] = a[0]; + tmp[1] = a[1]; + + dt_shift_128(tmp, -1); + while (dt_gt_128(tmp, zero)) { + dt_shift_128(tmp, -1); + nbits++; + } + + return (nbits); +} + +static void +dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference) +{ + uint64_t result[2]; + + result[0] = minuend[0] - subtrahend[0]; + result[1] = minuend[1] - subtrahend[1] - + (minuend[0] < subtrahend[0] ? 1 : 0); + + difference[0] = result[0]; + difference[1] = result[1]; +} + +static void +dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) +{ + uint64_t result[2]; + + result[0] = addend1[0] + addend2[0]; + result[1] = addend1[1] + addend2[1] + + (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); + + sum[0] = result[0]; + sum[1] = result[1]; +} + +/* + * The basic idea is to break the 2 64-bit values into 4 32-bit values, + * use native multiplication on those, and then re-combine into the + * resulting 128-bit value. + * + * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = + * hi1 * hi2 << 64 + + * hi1 * lo2 << 32 + + * hi2 * lo1 << 32 + + * lo1 * lo2 + */ +static void +dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) +{ + uint64_t hi1, hi2, lo1, lo2; + uint64_t tmp[2]; + + hi1 = factor1 >> 32; + hi2 = factor2 >> 32; + + lo1 = factor1 & DT_MASK_LO; + lo2 = factor2 & DT_MASK_LO; + + product[0] = lo1 * lo2; + product[1] = hi1 * hi2; + + tmp[0] = hi1 * lo2; + tmp[1] = 0; + dt_shift_128(tmp, 32); + dt_add_128(product, tmp, product); + + tmp[0] = hi2 * lo1; + tmp[1] = 0; + dt_shift_128(tmp, 32); + dt_add_128(product, tmp, product); +} + +/* + * This is long-hand division. + * + * We initialize subtrahend by shifting divisor left as far as possible. We + * loop, comparing subtrahend to dividend: if subtrahend is smaller, we + * subtract and set the appropriate bit in the result. We then shift + * subtrahend right by one bit for the next comparison. + */ +static void +dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient) +{ + uint64_t result[2] = { 0, 0 }; + uint64_t remainder[2]; + uint64_t subtrahend[2]; + uint64_t divisor_128[2]; + uint64_t mask[2] = { 1, 0 }; + int log = 0; + + assert(divisor != 0); + + divisor_128[0] = divisor; + divisor_128[1] = 0; + + remainder[0] = dividend[0]; + remainder[1] = dividend[1]; + + subtrahend[0] = divisor; + subtrahend[1] = 0; + + while (divisor > 0) { + log++; + divisor >>= 1; + } + + dt_shift_128(subtrahend, 128 - log); + dt_shift_128(mask, 128 - log); + + while (dt_ge_128(remainder, divisor_128)) { + if (dt_ge_128(remainder, subtrahend)) { + dt_subtract_128(remainder, subtrahend, remainder); + result[0] |= mask[0]; + result[1] |= mask[1]; + } + + dt_shift_128(subtrahend, -1); + dt_shift_128(mask, -1); + } + + quotient[0] = result[0]; + quotient[1] = result[1]; +} + +/* + * This is the long-hand method of calculating a square root. + * The algorithm is as follows: + * + * 1. Group the digits by 2 from the right. + * 2. Over the leftmost group, find the largest single-digit number + * whose square is less than that group. + * 3. Subtract the result of the previous step (2 or 4, depending) and + * bring down the next two-digit group. + * 4. For the result R we have so far, find the largest single-digit number + * x such that 2 * R * 10 * x + x^2 is less than the result from step 3. + * (Note that this is doubling R and performing a decimal left-shift by 1 + * and searching for the appropriate decimal to fill the one's place.) + * The value x is the next digit in the square root. + * Repeat steps 3 and 4 until the desired precision is reached. (We're + * dealing with integers, so the above is sufficient.) + * + * In decimal, the square root of 582,734 would be calculated as so: + * + * __7__6__3 + * | 58 27 34 + * -49 (7^2 == 49 => 7 is the first digit in the square root) + * -- + * 9 27 (Subtract and bring down the next group.) + * 146 8 76 (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in + * ----- the square root) + * 51 34 (Subtract and bring down the next group.) + * 1523 45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in + * ----- the square root) + * 5 65 (remainder) + * + * The above algorithm applies similarly in binary, but note that the + * only possible non-zero value for x in step 4 is 1, so step 4 becomes a + * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the + * preceding difference? + * + * In binary, the square root of 11011011 would be calculated as so: + * + * __1__1__1__0 + * | 11 01 10 11 + * 01 (0 << 2 + 1 == 1 < 11 => this bit is 1) + * -- + * 10 01 10 11 + * 101 1 01 (1 << 2 + 1 == 101 < 1001 => next bit is 1) + * ----- + * 1 00 10 11 + * 1101 11 01 (11 << 2 + 1 == 1101 < 10010 => next bit is 1) + * ------- + * 1 01 11 + * 11101 1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0) + * + */ +static uint64_t +dt_sqrt_128(uint64_t *square) +{ + uint64_t result[2] = { 0, 0 }; + uint64_t diff[2] = { 0, 0 }; + uint64_t one[2] = { 1, 0 }; + uint64_t next_pair[2]; + uint64_t next_try[2]; + uint64_t bit_pairs, pair_shift; + int i; + + bit_pairs = dt_nbits_128(square) / 2; + pair_shift = bit_pairs * 2; + + for (i = 0; i <= bit_pairs; i++) { + /* + * Bring down the next pair of bits. + */ + next_pair[0] = square[0]; + next_pair[1] = square[1]; + dt_shift_128(next_pair, -pair_shift); + next_pair[0] &= 0x3; + next_pair[1] = 0; + + dt_shift_128(diff, 2); + dt_add_128(diff, next_pair, diff); + + /* + * next_try = R << 2 + 1 + */ + next_try[0] = result[0]; + next_try[1] = result[1]; + dt_shift_128(next_try, 2); + dt_add_128(next_try, one, next_try); + + if (dt_le_128(next_try, diff)) { + dt_subtract_128(diff, next_try, diff); + dt_shift_128(result, 1); + dt_add_128(result, one, result); + } else { + dt_shift_128(result, 1); + } + + pair_shift -= 2; + } + + assert(result[1] == 0); + + return (result[0]); +} + +uint64_t +dt_stddev(uint64_t *data, uint64_t normal) +{ + uint64_t avg_of_squares[2]; + uint64_t square_of_avg[2]; + int64_t norm_avg; + uint64_t diff[2]; + + /* + * The standard approximation for standard deviation is + * sqrt(average(x**2) - average(x)**2), i.e. the square root + * of the average of the squares minus the square of the average. + */ + dt_divide_128(data + 2, normal, avg_of_squares); + dt_divide_128(avg_of_squares, data[0], avg_of_squares); + + norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0]; + + if (norm_avg < 0) + norm_avg = -norm_avg; + + dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg); + + dt_subtract_128(avg_of_squares, square_of_avg, diff); + + return (dt_sqrt_128(diff)); +} + +static int +dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last, + dtrace_bufdesc_t *buf, size_t offs) +{ + dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd; + dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd; + char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub; + dtrace_flowkind_t flow = DTRACEFLOW_NONE; + const char *str = NULL; + static const char *e_str[2] = { " -> ", " => " }; + static const char *r_str[2] = { " <- ", " <= " }; + static const char *ent = "entry", *ret = "return"; + static int entlen = 0, retlen = 0; + dtrace_epid_t next, id = epd->dtepd_epid; + int rval; + + if (entlen == 0) { + assert(retlen == 0); + entlen = strlen(ent); + retlen = strlen(ret); + } + + /* + * If the name of the probe is "entry" or ends with "-entry", we + * treat it as an entry; if it is "return" or ends with "-return", + * we treat it as a return. (This allows application-provided probes + * like "method-entry" or "function-entry" to participate in flow + * indentation -- without accidentally misinterpreting popular probe + * names like "carpentry", "gentry" or "Coventry".) + */ + if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' && + (sub == n || sub[-1] == '-')) { + flow = DTRACEFLOW_ENTRY; + str = e_str[strcmp(p, "syscall") == 0]; + } else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' && + (sub == n || sub[-1] == '-')) { + flow = DTRACEFLOW_RETURN; + str = r_str[strcmp(p, "syscall") == 0]; + } + + /* + * If we're going to indent this, we need to check the ID of our last + * call. If we're looking at the same probe ID but a different EPID, + * we _don't_ want to indent. (Yes, there are some minor holes in + * this scheme -- it's a heuristic.) + */ + if (flow == DTRACEFLOW_ENTRY) { + if ((last != DTRACE_EPIDNONE && id != last && + pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id)) + flow = DTRACEFLOW_NONE; + } + + /* + * If we're going to unindent this, it's more difficult to see if + * we don't actually want to unindent it -- we need to look at the + * _next_ EPID. + */ + if (flow == DTRACEFLOW_RETURN) { + offs += epd->dtepd_size; + + do { + if (offs >= buf->dtbd_size) + goto out; + + next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs); + + if (next == DTRACE_EPIDNONE) + offs += sizeof (id); + } while (next == DTRACE_EPIDNONE); + + if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0) + return (rval); + + if (next != id && npd->dtpd_id == pd->dtpd_id) + flow = DTRACEFLOW_NONE; + } + +out: + if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) { + data->dtpda_prefix = str; + } else { + data->dtpda_prefix = "| "; + } + + if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0) + data->dtpda_indent -= 2; + + data->dtpda_flow = flow; + + return (0); +} + +static int +dt_nullprobe() +{ + return (DTRACE_CONSUME_THIS); +} + +static int +dt_nullrec() +{ + return (DTRACE_CONSUME_NEXT); +} + +static void +dt_quantize_total(dtrace_hdl_t *dtp, int64_t datum, long double *total) +{ + long double val = dt_fabsl((long double)datum); + + if (dtp->dt_options[DTRACEOPT_AGGZOOM] == DTRACEOPT_UNSET) { + *total += val; + return; + } + + /* + * If we're zooming in on an aggregation, we want the height of the + * highest value to be approximately 95% of total bar height -- so we + * adjust up by the reciprocal of DTRACE_AGGZOOM_MAX when comparing to + * our highest value. + */ + val *= 1 / DTRACE_AGGZOOM_MAX; + + if (*total < val) + *total = val; +} + +static int +dt_print_quanthdr(dtrace_hdl_t *dtp, FILE *fp, int width) +{ + return (dt_printf(dtp, fp, "\n%*s %41s %-9s\n", + width ? width : 16, width ? "key" : "value", + "------------- Distribution -------------", "count")); +} + +static int +dt_print_quanthdr_packed(dtrace_hdl_t *dtp, FILE *fp, int width, + const dtrace_aggdata_t *aggdata, dtrace_actkind_t action) +{ + int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin; + int minwidth, maxwidth, i; + + assert(action == DTRACEAGG_QUANTIZE || action == DTRACEAGG_LQUANTIZE); + + if (action == DTRACEAGG_QUANTIZE) { + if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET) + min--; + + if (max < DTRACE_QUANTIZE_NBUCKETS - 1) + max++; + + minwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(min)); + maxwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(max)); + } else { + maxwidth = 8; + minwidth = maxwidth - 1; + max++; + } + + if (dt_printf(dtp, fp, "\n%*s %*s .", + width, width > 0 ? "key" : "", minwidth, "min") < 0) + return (-1); + + for (i = min; i <= max; i++) { + if (dt_printf(dtp, fp, "-") < 0) + return (-1); + } + + return (dt_printf(dtp, fp, ". %*s | count\n", -maxwidth, "max")); +} + +/* + * We use a subset of the Unicode Block Elements (U+2588 through U+258F, + * inclusive) to represent aggregations via UTF-8 -- which are expressed via + * 3-byte UTF-8 sequences. + */ +#define DTRACE_AGGUTF8_FULL 0x2588 +#define DTRACE_AGGUTF8_BASE 0x258f +#define DTRACE_AGGUTF8_LEVELS 8 + +#define DTRACE_AGGUTF8_BYTE0(val) (0xe0 | ((val) >> 12)) +#define DTRACE_AGGUTF8_BYTE1(val) (0x80 | (((val) >> 6) & 0x3f)) +#define DTRACE_AGGUTF8_BYTE2(val) (0x80 | ((val) & 0x3f)) + +static int +dt_print_quantline_utf8(dtrace_hdl_t *dtp, FILE *fp, int64_t val, + uint64_t normal, long double total) +{ + uint_t len = 40, i, whole, partial; + long double f = (dt_fabsl((long double)val) * len) / total; + const char *spaces = " "; + + whole = (uint_t)f; + partial = (uint_t)((f - (long double)(uint_t)f) * + (long double)DTRACE_AGGUTF8_LEVELS); + + if (dt_printf(dtp, fp, "|") < 0) + return (-1); + + for (i = 0; i < whole; i++) { + if (dt_printf(dtp, fp, "%c%c%c", + DTRACE_AGGUTF8_BYTE0(DTRACE_AGGUTF8_FULL), + DTRACE_AGGUTF8_BYTE1(DTRACE_AGGUTF8_FULL), + DTRACE_AGGUTF8_BYTE2(DTRACE_AGGUTF8_FULL)) < 0) + return (-1); + } + + if (partial != 0) { + partial = DTRACE_AGGUTF8_BASE - (partial - 1); + + if (dt_printf(dtp, fp, "%c%c%c", + DTRACE_AGGUTF8_BYTE0(partial), + DTRACE_AGGUTF8_BYTE1(partial), + DTRACE_AGGUTF8_BYTE2(partial)) < 0) + return (-1); + + i++; + } + + return (dt_printf(dtp, fp, "%s %-9lld\n", spaces + i, + (long long)val / normal)); +} + +static int +dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val, + uint64_t normal, long double total, char positives, char negatives) +{ + long double f; + uint_t depth, len = 40; + + const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"; + const char *spaces = " "; + + assert(strlen(ats) == len && strlen(spaces) == len); + assert(!(total == 0 && (positives || negatives))); + assert(!(val < 0 && !negatives)); + assert(!(val > 0 && !positives)); + assert(!(val != 0 && total == 0)); + + if (!negatives) { + if (positives) { + if (dtp->dt_encoding == DT_ENCODING_UTF8) { + return (dt_print_quantline_utf8(dtp, fp, val, + normal, total)); + } + + f = (dt_fabsl((long double)val) * len) / total; + depth = (uint_t)(f + 0.5); + } else { + depth = 0; + } + + return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth, + spaces + depth, (long long)val / normal)); + } + + if (!positives) { + f = (dt_fabsl((long double)val) * len) / total; + depth = (uint_t)(f + 0.5); + + return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth, + ats + len - depth, (long long)val / normal)); + } + + /* + * If we're here, we have both positive and negative bucket values. + * To express this graphically, we're going to generate both positive + * and negative bars separated by a centerline. These bars are half + * the size of normal quantize()/lquantize() bars, so we divide the + * length in half before calculating the bar length. + */ + len /= 2; + ats = &ats[len]; + spaces = &spaces[len]; + + f = (dt_fabsl((long double)val) * len) / total; + depth = (uint_t)(f + 0.5); + + if (val <= 0) { + return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth, + ats + len - depth, len, "", (long long)val / normal)); + } else { + return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "", + ats + len - depth, spaces + depth, + (long long)val / normal)); + } +} + +/* + * As with UTF-8 printing of aggregations, we use a subset of the Unicode + * Block Elements (U+2581 through U+2588, inclusive) to represent our packed + * aggregation. + */ +#define DTRACE_AGGPACK_BASE 0x2581 +#define DTRACE_AGGPACK_LEVELS 8 + +static int +dt_print_packed(dtrace_hdl_t *dtp, FILE *fp, + long double datum, long double total) +{ + static boolean_t utf8_checked = B_FALSE; + static boolean_t utf8; + char *ascii = "__xxxxXX"; + char *neg = "vvvvVV"; + unsigned int len; + long double val; + + if (!utf8_checked) { + char *term; + + /* + * We want to determine if we can reasonably emit UTF-8 for our + * packed aggregation. To do this, we will check for terminals + * that are known to be primitive to emit UTF-8 on these. + */ + utf8_checked = B_TRUE; + + if (dtp->dt_encoding == DT_ENCODING_ASCII) { + utf8 = B_FALSE; + } else if (dtp->dt_encoding == DT_ENCODING_UTF8) { + utf8 = B_TRUE; + } else if ((term = getenv("TERM")) != NULL && + (strcmp(term, "sun") == 0 || + strcmp(term, "sun-color") == 0) || + strcmp(term, "dumb") == 0) { + utf8 = B_FALSE; + } else { + utf8 = B_TRUE; + } + } + + if (datum == 0) + return (dt_printf(dtp, fp, " ")); + + if (datum < 0) { + len = strlen(neg); + val = dt_fabsl(datum * (len - 1)) / total; + return (dt_printf(dtp, fp, "%c", neg[(uint_t)(val + 0.5)])); + } + + if (utf8) { + int block = DTRACE_AGGPACK_BASE + (unsigned int)(((datum * + (DTRACE_AGGPACK_LEVELS - 1)) / total) + 0.5); + + return (dt_printf(dtp, fp, "%c%c%c", + DTRACE_AGGUTF8_BYTE0(block), + DTRACE_AGGUTF8_BYTE1(block), + DTRACE_AGGUTF8_BYTE2(block))); + } + + len = strlen(ascii); + val = (datum * (len - 1)) / total; + return (dt_printf(dtp, fp, "%c", ascii[(uint_t)(val + 0.5)])); +} + +int +dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr, + size_t size, uint64_t normal) +{ + const int64_t *data = addr; + int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1; + long double total = 0; + char positives = 0, negatives = 0; + + if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t)) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0) + first_bin++; + + if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) { + /* + * There isn't any data. This is possible if the aggregation + * has been clear()'d or if negative increment values have been + * used. Regardless, we'll print the buckets around 0. + */ + first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1; + last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1; + } else { + if (first_bin > 0) + first_bin--; + + while (last_bin > 0 && data[last_bin] == 0) + last_bin--; + + if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1) + last_bin++; + } + + for (i = first_bin; i <= last_bin; i++) { + positives |= (data[i] > 0); + negatives |= (data[i] < 0); + dt_quantize_total(dtp, data[i], &total); + } + + if (dt_print_quanthdr(dtp, fp, 0) < 0) + return (-1); + + for (i = first_bin; i <= last_bin; i++) { + if (dt_printf(dtp, fp, "%16lld ", + (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0) + return (-1); + + if (dt_print_quantline(dtp, fp, data[i], normal, total, + positives, negatives) < 0) + return (-1); + } + + return (0); +} + +int +dt_print_quantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr, + size_t size, const dtrace_aggdata_t *aggdata) +{ + const int64_t *data = addr; + long double total = 0, count = 0; + int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin, i; + int64_t minval, maxval; + + if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t)) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET) + min--; + + if (max < DTRACE_QUANTIZE_NBUCKETS - 1) + max++; + + minval = DTRACE_QUANTIZE_BUCKETVAL(min); + maxval = DTRACE_QUANTIZE_BUCKETVAL(max); + + if (dt_printf(dtp, fp, " %*lld :", dt_ndigits(minval), + (long long)minval) < 0) + return (-1); + + for (i = min; i <= max; i++) { + dt_quantize_total(dtp, data[i], &total); + count += data[i]; + } + + for (i = min; i <= max; i++) { + if (dt_print_packed(dtp, fp, data[i], total) < 0) + return (-1); + } + + if (dt_printf(dtp, fp, ": %*lld | %lld\n", + -dt_ndigits(maxval), (long long)maxval, (long long)count) < 0) + return (-1); + + return (0); +} + +int +dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr, + size_t size, uint64_t normal) +{ + const int64_t *data = addr; + int i, first_bin, last_bin, base; + uint64_t arg; + long double total = 0; + uint16_t step, levels; + char positives = 0, negatives = 0; + + if (size < sizeof (uint64_t)) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + arg = *data++; + size -= sizeof (uint64_t); + + base = DTRACE_LQUANTIZE_BASE(arg); + step = DTRACE_LQUANTIZE_STEP(arg); + levels = DTRACE_LQUANTIZE_LEVELS(arg); + + first_bin = 0; + last_bin = levels + 1; + + if (size != sizeof (uint64_t) * (levels + 2)) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + while (first_bin <= levels + 1 && data[first_bin] == 0) + first_bin++; + + if (first_bin > levels + 1) { + first_bin = 0; + last_bin = 2; + } else { + if (first_bin > 0) + first_bin--; + + while (last_bin > 0 && data[last_bin] == 0) + last_bin--; + + if (last_bin < levels + 1) + last_bin++; + } + + for (i = first_bin; i <= last_bin; i++) { + positives |= (data[i] > 0); + negatives |= (data[i] < 0); + dt_quantize_total(dtp, data[i], &total); + } + + if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value", + "------------- Distribution -------------", "count") < 0) + return (-1); + + for (i = first_bin; i <= last_bin; i++) { + char c[32]; + int err; + + if (i == 0) { + (void) snprintf(c, sizeof (c), "< %d", base); + err = dt_printf(dtp, fp, "%16s ", c); + } else if (i == levels + 1) { + (void) snprintf(c, sizeof (c), ">= %d", + base + (levels * step)); + err = dt_printf(dtp, fp, "%16s ", c); + } else { + err = dt_printf(dtp, fp, "%16d ", + base + (i - 1) * step); + } + + if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal, + total, positives, negatives) < 0) + return (-1); + } + + return (0); +} + +/*ARGSUSED*/ +int +dt_print_lquantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr, + size_t size, const dtrace_aggdata_t *aggdata) +{ + const int64_t *data = addr; + long double total = 0, count = 0; + int min, max, base, err; + uint64_t arg; + uint16_t step, levels; + char c[32]; + unsigned int i; + + if (size < sizeof (uint64_t)) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + arg = *data++; + size -= sizeof (uint64_t); + + base = DTRACE_LQUANTIZE_BASE(arg); + step = DTRACE_LQUANTIZE_STEP(arg); + levels = DTRACE_LQUANTIZE_LEVELS(arg); + + if (size != sizeof (uint64_t) * (levels + 2)) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + min = 0; + max = levels + 1; + + if (min == 0) { + (void) snprintf(c, sizeof (c), "< %d", base); + err = dt_printf(dtp, fp, "%8s :", c); + } else { + err = dt_printf(dtp, fp, "%8d :", base + (min - 1) * step); + } + + if (err < 0) + return (-1); + + for (i = min; i <= max; i++) { + dt_quantize_total(dtp, data[i], &total); + count += data[i]; + } + + for (i = min; i <= max; i++) { + if (dt_print_packed(dtp, fp, data[i], total) < 0) + return (-1); + } + + (void) snprintf(c, sizeof (c), ">= %d", base + (levels * step)); + return (dt_printf(dtp, fp, ": %-8s | %lld\n", c, (long long)count)); +} + +int +dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr, + size_t size, uint64_t normal) +{ + int i, first_bin, last_bin, bin = 1, order, levels; + uint16_t factor, low, high, nsteps; + const int64_t *data = addr; + int64_t value = 1, next, step; + char positives = 0, negatives = 0; + long double total = 0; + uint64_t arg; + char c[32]; + + if (size < sizeof (uint64_t)) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + arg = *data++; + size -= sizeof (uint64_t); + + factor = DTRACE_LLQUANTIZE_FACTOR(arg); + low = DTRACE_LLQUANTIZE_LOW(arg); + high = DTRACE_LLQUANTIZE_HIGH(arg); + nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); + + /* + * We don't expect to be handed invalid llquantize() parameters here, + * but sanity check them (to a degree) nonetheless. + */ + if (size > INT32_MAX || factor < 2 || low >= high || + nsteps == 0 || factor > nsteps) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + levels = (int)size / sizeof (uint64_t); + + first_bin = 0; + last_bin = levels - 1; + + while (first_bin < levels && data[first_bin] == 0) + first_bin++; + + if (first_bin == levels) { + first_bin = 0; + last_bin = 1; + } else { + if (first_bin > 0) + first_bin--; + + while (last_bin > 0 && data[last_bin] == 0) + last_bin--; + + if (last_bin < levels - 1) + last_bin++; + } + + for (i = first_bin; i <= last_bin; i++) { + positives |= (data[i] > 0); + negatives |= (data[i] < 0); + dt_quantize_total(dtp, data[i], &total); + } + + if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value", + "------------- Distribution -------------", "count") < 0) + return (-1); + + for (order = 0; order < low; order++) + value *= factor; + + next = value * factor; + step = next > nsteps ? next / nsteps : 1; + + if (first_bin == 0) { + (void) snprintf(c, sizeof (c), "< %lld", (long long)value); + + if (dt_printf(dtp, fp, "%16s ", c) < 0) + return (-1); + + if (dt_print_quantline(dtp, fp, data[0], normal, + total, positives, negatives) < 0) + return (-1); + } + + while (order <= high) { + if (bin >= first_bin && bin <= last_bin) { + if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0) + return (-1); + + if (dt_print_quantline(dtp, fp, data[bin], + normal, total, positives, negatives) < 0) + return (-1); + } + + assert(value < next); + bin++; + + if ((value += step) != next) + continue; + + next = value * factor; + step = next > nsteps ? next / nsteps : 1; + order++; + } + + if (last_bin < bin) + return (0); + + assert(last_bin == bin); + (void) snprintf(c, sizeof (c), ">= %lld", (long long)value); + + if (dt_printf(dtp, fp, "%16s ", c) < 0) + return (-1); + + return (dt_print_quantline(dtp, fp, data[bin], normal, + total, positives, negatives)); +} + +/*ARGSUSED*/ +static int +dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, + size_t size, uint64_t normal) +{ + /* LINTED - alignment */ + int64_t *data = (int64_t *)addr; + + return (dt_printf(dtp, fp, " %16lld", data[0] ? + (long long)(data[1] / (int64_t)normal / data[0]) : 0)); +} + +/*ARGSUSED*/ +static int +dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, + size_t size, uint64_t normal) +{ + /* LINTED - alignment */ + uint64_t *data = (uint64_t *)addr; + + return (dt_printf(dtp, fp, " %16llu", data[0] ? + (unsigned long long) dt_stddev(data, normal) : 0)); +} + +/*ARGSUSED*/ +static int +dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, + size_t nbytes, int width, int quiet, int forceraw) +{ + /* + * If the byte stream is a series of printable characters, followed by + * a terminating byte, we print it out as a string. Otherwise, we + * assume that it's something else and just print the bytes. + */ + int i, j, margin = 5; + char *c = (char *)addr; + + if (nbytes == 0) + return (0); + + if (forceraw) + goto raw; + + if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET) + goto raw; + + for (i = 0; i < nbytes; i++) { + /* + * We define a "printable character" to be one for which + * isprint(3C) returns non-zero, isspace(3C) returns non-zero, + * or a character which is either backspace or the bell. + * Backspace and the bell are regrettably special because + * they fail the first two tests -- and yet they are entirely + * printable. These are the only two control characters that + * have meaning for the terminal and for which isprint(3C) and + * isspace(3C) return 0. + */ + if (isprint(c[i]) || isspace(c[i]) || + c[i] == '\b' || c[i] == '\a') + continue; + + if (c[i] == '\0' && i > 0) { + /* + * This looks like it might be a string. Before we + * assume that it is indeed a string, check the + * remainder of the byte range; if it contains + * additional non-nul characters, we'll assume that + * it's a binary stream that just happens to look like + * a string, and we'll print out the individual bytes. + */ + for (j = i + 1; j < nbytes; j++) { + if (c[j] != '\0') + break; + } + + if (j != nbytes) + break; + + if (quiet) { + return (dt_printf(dtp, fp, "%s", c)); + } else { + return (dt_printf(dtp, fp, " %s%*s", + width < 0 ? " " : "", width, c)); + } + } + + break; + } + + if (i == nbytes) { + /* + * The byte range is all printable characters, but there is + * no trailing nul byte. We'll assume that it's a string and + * print it as such. + */ + char *s = alloca(nbytes + 1); + bcopy(c, s, nbytes); + s[nbytes] = '\0'; + return (dt_printf(dtp, fp, " %-*s", width, s)); + } + +raw: + if (dt_printf(dtp, fp, "\n%*s ", margin, "") < 0) + return (-1); + + for (i = 0; i < 16; i++) + if (dt_printf(dtp, fp, " %c", "0123456789abcdef"[i]) < 0) + return (-1); + + if (dt_printf(dtp, fp, " 0123456789abcdef\n") < 0) + return (-1); + + + for (i = 0; i < nbytes; i += 16) { + if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0) + return (-1); + + for (j = i; j < i + 16 && j < nbytes; j++) { + if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0) + return (-1); + } + + while (j++ % 16) { + if (dt_printf(dtp, fp, " ") < 0) + return (-1); + } + + if (dt_printf(dtp, fp, " ") < 0) + return (-1); + + for (j = i; j < i + 16 && j < nbytes; j++) { + if (dt_printf(dtp, fp, "%c", + c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0) + return (-1); + } + + if (dt_printf(dtp, fp, "\n") < 0) + return (-1); + } + + return (0); +} + +int +dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format, + caddr_t addr, int depth, int size) +{ + dtrace_syminfo_t dts; + GElf_Sym sym; + int i, indent; + char c[PATH_MAX * 2]; + uint64_t pc; + + if (dt_printf(dtp, fp, "\n") < 0) + return (-1); + + if (format == NULL) + format = "%s"; + + if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET) + indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT]; + else + indent = _dtrace_stkindent; + + for (i = 0; i < depth; i++) { + switch (size) { + case sizeof (uint32_t): + /* LINTED - alignment */ + pc = *((uint32_t *)addr); + break; + + case sizeof (uint64_t): + /* LINTED - alignment */ + pc = *((uint64_t *)addr); + break; + + default: + return (dt_set_errno(dtp, EDT_BADSTACKPC)); + } + + if (pc == 0) + break; + + addr += size; + + if (dt_printf(dtp, fp, "%*s", indent, "") < 0) + return (-1); + + if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) { + if (pc > sym.st_value) { + (void) snprintf(c, sizeof (c), "%s`%s+0x%llx", + dts.dts_object, dts.dts_name, + (u_longlong_t)(pc - sym.st_value)); + } else { + (void) snprintf(c, sizeof (c), "%s`%s", + dts.dts_object, dts.dts_name); + } + } else { + /* + * We'll repeat the lookup, but this time we'll specify + * a NULL GElf_Sym -- indicating that we're only + * interested in the containing module. + */ + if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) { + (void) snprintf(c, sizeof (c), "%s`0x%llx", + dts.dts_object, (u_longlong_t)pc); + } else { + (void) snprintf(c, sizeof (c), "0x%llx", + (u_longlong_t)pc); + } + } + + if (dt_printf(dtp, fp, format, c) < 0) + return (-1); + + if (dt_printf(dtp, fp, "\n") < 0) + return (-1); + } + + return (0); +} + +int +dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format, + caddr_t addr, uint64_t arg) +{ + /* LINTED - alignment */ + uint64_t *pc = (uint64_t *)addr; + uint32_t depth = DTRACE_USTACK_NFRAMES(arg); + uint32_t strsize = DTRACE_USTACK_STRSIZE(arg); + const char *strbase = addr + (depth + 1) * sizeof (uint64_t); + const char *str = strsize ? strbase : NULL; + int err = 0; + + char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2]; + struct ps_prochandle *P; + GElf_Sym sym; + int i, indent; + pid_t pid; + + if (depth == 0) + return (0); + + pid = (pid_t)*pc++; + + if (dt_printf(dtp, fp, "\n") < 0) + return (-1); + + if (format == NULL) + format = "%s"; + + if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET) + indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT]; + else + indent = _dtrace_stkindent; + + /* + * Ultimately, we need to add an entry point in the library vector for + * determining <symbol, offset> from <pid, address>. For now, if + * this is a vector open, we just print the raw address or string. + */ + if (dtp->dt_vector == NULL) + P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0); + else + P = NULL; + + if (P != NULL) + dt_proc_lock(dtp, P); /* lock handle while we perform lookups */ + + for (i = 0; i < depth && pc[i] != 0; i++) { + const prmap_t *map; + + if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0) + break; + + if (P != NULL && Plookup_by_addr(P, pc[i], + name, sizeof (name), &sym) == 0) { + (void) Pobjname(P, pc[i], objname, sizeof (objname)); + + if (pc[i] > sym.st_value) { + (void) snprintf(c, sizeof (c), + "%s`%s+0x%llx", dt_basename(objname), name, + (u_longlong_t)(pc[i] - sym.st_value)); + } else { + (void) snprintf(c, sizeof (c), + "%s`%s", dt_basename(objname), name); + } + } else if (str != NULL && str[0] != '\0' && str[0] != '@' && + (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL || + (map->pr_mflags & MA_WRITE)))) { + /* + * If the current string pointer in the string table + * does not point to an empty string _and_ the program + * counter falls in a writable region, we'll use the + * string from the string table instead of the raw + * address. This last condition is necessary because + * some (broken) ustack helpers will return a string + * even for a program counter that they can't + * identify. If we have a string for a program + * counter that falls in a segment that isn't + * writable, we assume that we have fallen into this + * case and we refuse to use the string. + */ + (void) snprintf(c, sizeof (c), "%s", str); + } else { + if (P != NULL && Pobjname(P, pc[i], objname, + sizeof (objname)) != 0) { + (void) snprintf(c, sizeof (c), "%s`0x%llx", + dt_basename(objname), (u_longlong_t)pc[i]); + } else { + (void) snprintf(c, sizeof (c), "0x%llx", + (u_longlong_t)pc[i]); + } + } + + if ((err = dt_printf(dtp, fp, format, c)) < 0) + break; + + if ((err = dt_printf(dtp, fp, "\n")) < 0) + break; + + if (str != NULL && str[0] == '@') { + /* + * If the first character of the string is an "at" sign, + * then the string is inferred to be an annotation -- + * and it is printed out beneath the frame and offset + * with brackets. + */ + if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0) + break; + + (void) snprintf(c, sizeof (c), " [ %s ]", &str[1]); + + if ((err = dt_printf(dtp, fp, format, c)) < 0) + break; + + if ((err = dt_printf(dtp, fp, "\n")) < 0) + break; + } + + if (str != NULL) { + str += strlen(str) + 1; + if (str - strbase >= strsize) + str = NULL; + } + } + + if (P != NULL) { + dt_proc_unlock(dtp, P); + dt_proc_release(dtp, P); + } + + return (err); +} + +static int +dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act) +{ + /* LINTED - alignment */ + uint64_t pid = ((uint64_t *)addr)[0]; + /* LINTED - alignment */ + uint64_t pc = ((uint64_t *)addr)[1]; + const char *format = " %-50s"; + char *s; + int n, len = 256; + + if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) { + struct ps_prochandle *P; + + if ((P = dt_proc_grab(dtp, pid, + PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) { + GElf_Sym sym; + + dt_proc_lock(dtp, P); + + if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0) + pc = sym.st_value; + + dt_proc_unlock(dtp, P); + dt_proc_release(dtp, P); + } + } + + do { + n = len; + s = alloca(n); + } while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n); + + return (dt_printf(dtp, fp, format, s)); +} + +int +dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr) +{ + /* LINTED - alignment */ + uint64_t pid = ((uint64_t *)addr)[0]; + /* LINTED - alignment */ + uint64_t pc = ((uint64_t *)addr)[1]; + int err = 0; + + char objname[PATH_MAX], c[PATH_MAX * 2]; + struct ps_prochandle *P; + + if (format == NULL) + format = " %-50s"; + + /* + * See the comment in dt_print_ustack() for the rationale for + * printing raw addresses in the vectored case. + */ + if (dtp->dt_vector == NULL) + P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0); + else + P = NULL; + + if (P != NULL) + dt_proc_lock(dtp, P); /* lock handle while we perform lookups */ + + if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) { + (void) snprintf(c, sizeof (c), "%s", dt_basename(objname)); + } else { + (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc); + } + + err = dt_printf(dtp, fp, format, c); + + if (P != NULL) { + dt_proc_unlock(dtp, P); + dt_proc_release(dtp, P); + } + + return (err); +} + +int +dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr) +{ + int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET); + size_t nbytes = *((uintptr_t *) addr); + + return (dt_print_bytes(dtp, fp, addr + sizeof(uintptr_t), + nbytes, 50, quiet, 1)); +} + +typedef struct dt_type_cbdata { + dtrace_hdl_t *dtp; + dtrace_typeinfo_t dtt; + caddr_t addr; + caddr_t addrend; + const char *name; + int f_type; + int indent; + int type_width; + int name_width; + FILE *fp; +} dt_type_cbdata_t; + +static int dt_print_type_data(dt_type_cbdata_t *, ctf_id_t); + +static int +dt_print_type_member(const char *name, ctf_id_t type, ulong_t off, void *arg) +{ + dt_type_cbdata_t cbdata; + dt_type_cbdata_t *cbdatap = arg; + ssize_t ssz; + + if ((ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type)) <= 0) + return (0); + + off /= 8; + + cbdata = *cbdatap; + cbdata.name = name; + cbdata.addr += off; + cbdata.addrend = cbdata.addr + ssz; + + return (dt_print_type_data(&cbdata, type)); +} + +static int +dt_print_type_width(const char *name, ctf_id_t type, ulong_t off, void *arg) +{ + char buf[DT_TYPE_NAMELEN]; + char *p; + dt_type_cbdata_t *cbdatap = arg; + size_t sz = strlen(name); + + ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf)); + + if ((p = strchr(buf, '[')) != NULL) + p[-1] = '\0'; + else + p = ""; + + sz += strlen(p); + + if (sz > cbdatap->name_width) + cbdatap->name_width = sz; + + sz = strlen(buf); + + if (sz > cbdatap->type_width) + cbdatap->type_width = sz; + + return (0); +} + +static int +dt_print_type_data(dt_type_cbdata_t *cbdatap, ctf_id_t type) +{ + caddr_t addr = cbdatap->addr; + caddr_t addrend = cbdatap->addrend; + char buf[DT_TYPE_NAMELEN]; + char *p; + int cnt = 0; + uint_t kind = ctf_type_kind(cbdatap->dtt.dtt_ctfp, type); + ssize_t ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type); + + ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf)); + + if ((p = strchr(buf, '[')) != NULL) + p[-1] = '\0'; + else + p = ""; + + if (cbdatap->f_type) { + int type_width = roundup(cbdatap->type_width + 1, 4); + int name_width = roundup(cbdatap->name_width + 1, 4); + + name_width -= strlen(cbdatap->name); + + dt_printf(cbdatap->dtp, cbdatap->fp, "%*s%-*s%s%-*s = ",cbdatap->indent * 4,"",type_width,buf,cbdatap->name,name_width,p); + } + + while (addr < addrend) { + dt_type_cbdata_t cbdata; + ctf_arinfo_t arinfo; + ctf_encoding_t cte; + uintptr_t *up; + void *vp = addr; + cbdata = *cbdatap; + cbdata.name = ""; + cbdata.addr = addr; + cbdata.addrend = addr + ssz; + cbdata.f_type = 0; + cbdata.indent++; + cbdata.type_width = 0; + cbdata.name_width = 0; + + if (cnt > 0) + dt_printf(cbdatap->dtp, cbdatap->fp, "%*s", cbdatap->indent * 4,""); + + switch (kind) { + case CTF_K_INTEGER: + if (ctf_type_encoding(cbdatap->dtt.dtt_ctfp, type, &cte) != 0) + return (-1); + if ((cte.cte_format & CTF_INT_SIGNED) != 0) + switch (cte.cte_bits) { + case 8: + if (isprint(*((char *) vp))) + dt_printf(cbdatap->dtp, cbdatap->fp, "'%c', ", *((char *) vp)); + dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((char *) vp), *((char *) vp)); + break; + case 16: + dt_printf(cbdatap->dtp, cbdatap->fp, "%hd (0x%hx);\n", *((short *) vp), *((u_short *) vp)); + break; + case 32: + dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((int *) vp), *((u_int *) vp)); + break; + case 64: + dt_printf(cbdatap->dtp, cbdatap->fp, "%jd (0x%jx);\n", *((long long *) vp), *((unsigned long long *) vp)); + break; + default: + dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits); + break; + } + else + switch (cte.cte_bits) { + case 8: + dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((uint8_t *) vp) & 0xff, *((uint8_t *) vp) & 0xff); + break; + case 16: + dt_printf(cbdatap->dtp, cbdatap->fp, "%hu (0x%hx);\n", *((u_short *) vp), *((u_short *) vp)); + break; + case 32: + dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((u_int *) vp), *((u_int *) vp)); + break; + case 64: + dt_printf(cbdatap->dtp, cbdatap->fp, "%ju (0x%jx);\n", *((unsigned long long *) vp), *((unsigned long long *) vp)); + break; + default: + dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits); + break; + } + break; + case CTF_K_FLOAT: + dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FLOAT: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits); + break; + case CTF_K_POINTER: + dt_printf(cbdatap->dtp, cbdatap->fp, "%p;\n", *((void **) addr)); + break; + case CTF_K_ARRAY: + if (ctf_array_info(cbdatap->dtt.dtt_ctfp, type, &arinfo) != 0) + return (-1); + dt_printf(cbdatap->dtp, cbdatap->fp, "{\n%*s",cbdata.indent * 4,""); + dt_print_type_data(&cbdata, arinfo.ctr_contents); + dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,""); + break; + case CTF_K_FUNCTION: + dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FUNCTION:\n"); + break; + case CTF_K_STRUCT: + cbdata.f_type = 1; + if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type, + dt_print_type_width, &cbdata) != 0) + return (-1); + dt_printf(cbdatap->dtp, cbdatap->fp, "{\n"); + if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type, + dt_print_type_member, &cbdata) != 0) + return (-1); + dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,""); + break; + case CTF_K_UNION: + cbdata.f_type = 1; + if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type, + dt_print_type_width, &cbdata) != 0) + return (-1); + dt_printf(cbdatap->dtp, cbdatap->fp, "{\n"); + if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type, + dt_print_type_member, &cbdata) != 0) + return (-1); + dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,""); + break; + case CTF_K_ENUM: + dt_printf(cbdatap->dtp, cbdatap->fp, "%s;\n", ctf_enum_name(cbdatap->dtt.dtt_ctfp, type, *((int *) vp))); + break; + case CTF_K_TYPEDEF: + dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type)); + break; + case CTF_K_VOLATILE: + if (cbdatap->f_type) + dt_printf(cbdatap->dtp, cbdatap->fp, "volatile "); + dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type)); + break; + case CTF_K_CONST: + if (cbdatap->f_type) + dt_printf(cbdatap->dtp, cbdatap->fp, "const "); + dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type)); + break; + case CTF_K_RESTRICT: + if (cbdatap->f_type) + dt_printf(cbdatap->dtp, cbdatap->fp, "restrict "); + dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type)); + break; + default: + break; + } + + addr += ssz; + cnt++; + } + + return (0); +} + +static int +dt_print_type(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr) +{ + caddr_t addrend; + char *p; + dtrace_typeinfo_t dtt; + dt_type_cbdata_t cbdata; + int num = 0; + int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET); + ssize_t ssz; + + if (!quiet) + dt_printf(dtp, fp, "\n"); + + /* Get the total number of bytes of data buffered. */ + size_t nbytes = *((uintptr_t *) addr); + addr += sizeof(uintptr_t); + + /* + * Get the size of the type so that we can check that it matches + * the CTF data we look up and so that we can figure out how many + * type elements are buffered. + */ + size_t typs = *((uintptr_t *) addr); + addr += sizeof(uintptr_t); + + /* + * Point to the type string in the buffer. Get it's string + * length and round it up to become the offset to the start + * of the buffered type data which we would like to be aligned + * for easy access. + */ + char *strp = (char *) addr; + int offset = roundup(strlen(strp) + 1, sizeof(uintptr_t)); + + /* + * The type string might have a format such as 'int [20]'. + * Check if there is an array dimension present. + */ + if ((p = strchr(strp, '[')) != NULL) { + /* Strip off the array dimension. */ + *p++ = '\0'; + + for (; *p != '\0' && *p != ']'; p++) + num = num * 10 + *p - '0'; + } else + /* No array dimension, so default. */ + num = 1; + + /* Lookup the CTF type from the type string. */ + if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_EVERY, strp, &dtt) < 0) + return (-1); + + /* Offset the buffer address to the start of the data... */ + addr += offset; + + ssz = ctf_type_size(dtt.dtt_ctfp, dtt.dtt_type); + + if (typs != ssz) { + printf("Expected type size from buffer (%lu) to match type size looked up now (%ld)\n", (u_long) typs, (long) ssz); + return (-1); + } + + cbdata.dtp = dtp; + cbdata.dtt = dtt; + cbdata.name = ""; + cbdata.addr = addr; + cbdata.addrend = addr + nbytes; + cbdata.indent = 1; + cbdata.f_type = 1; + cbdata.type_width = 0; + cbdata.name_width = 0; + cbdata.fp = fp; + + return (dt_print_type_data(&cbdata, dtt.dtt_type)); +} + +static int +dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr) +{ + /* LINTED - alignment */ + uint64_t pc = *((uint64_t *)addr); + dtrace_syminfo_t dts; + GElf_Sym sym; + char c[PATH_MAX * 2]; + + if (format == NULL) + format = " %-50s"; + + if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) { + (void) snprintf(c, sizeof (c), "%s`%s", + dts.dts_object, dts.dts_name); + } else { + /* + * We'll repeat the lookup, but this time we'll specify a + * NULL GElf_Sym -- indicating that we're only interested in + * the containing module. + */ + if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) { + (void) snprintf(c, sizeof (c), "%s`0x%llx", + dts.dts_object, (u_longlong_t)pc); + } else { + (void) snprintf(c, sizeof (c), "0x%llx", + (u_longlong_t)pc); + } + } + + if (dt_printf(dtp, fp, format, c) < 0) + return (-1); + + return (0); +} + +int +dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr) +{ + /* LINTED - alignment */ + uint64_t pc = *((uint64_t *)addr); + dtrace_syminfo_t dts; + char c[PATH_MAX * 2]; + + if (format == NULL) + format = " %-50s"; + + if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) { + (void) snprintf(c, sizeof (c), "%s", dts.dts_object); + } else { + (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc); + } + + if (dt_printf(dtp, fp, format, c) < 0) + return (-1); + + return (0); +} + +typedef struct dt_normal { + dtrace_aggvarid_t dtnd_id; + uint64_t dtnd_normal; +} dt_normal_t; + +static int +dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg) +{ + dt_normal_t *normal = arg; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + dtrace_aggvarid_t id = normal->dtnd_id; + + if (agg->dtagd_nrecs == 0) + return (DTRACE_AGGWALK_NEXT); + + if (agg->dtagd_varid != id) + return (DTRACE_AGGWALK_NEXT); + + ((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal; + return (DTRACE_AGGWALK_NORMALIZE); +} + +static int +dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec) +{ + dt_normal_t normal; + caddr_t addr; + + /* + * We (should) have two records: the aggregation ID followed by the + * normalization value. + */ + addr = base + rec->dtrd_offset; + + if (rec->dtrd_size != sizeof (dtrace_aggvarid_t)) + return (dt_set_errno(dtp, EDT_BADNORMAL)); + + /* LINTED - alignment */ + normal.dtnd_id = *((dtrace_aggvarid_t *)addr); + rec++; + + if (rec->dtrd_action != DTRACEACT_LIBACT) + return (dt_set_errno(dtp, EDT_BADNORMAL)); + + if (rec->dtrd_arg != DT_ACT_NORMALIZE) + return (dt_set_errno(dtp, EDT_BADNORMAL)); + + addr = base + rec->dtrd_offset; + + switch (rec->dtrd_size) { + case sizeof (uint64_t): + /* LINTED - alignment */ + normal.dtnd_normal = *((uint64_t *)addr); + break; + case sizeof (uint32_t): + /* LINTED - alignment */ + normal.dtnd_normal = *((uint32_t *)addr); + break; + case sizeof (uint16_t): + /* LINTED - alignment */ + normal.dtnd_normal = *((uint16_t *)addr); + break; + case sizeof (uint8_t): + normal.dtnd_normal = *((uint8_t *)addr); + break; + default: + return (dt_set_errno(dtp, EDT_BADNORMAL)); + } + + (void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal); + + return (0); +} + +static int +dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg) +{ + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg); + + if (agg->dtagd_nrecs == 0) + return (DTRACE_AGGWALK_NEXT); + + if (agg->dtagd_varid != id) + return (DTRACE_AGGWALK_NEXT); + + return (DTRACE_AGGWALK_DENORMALIZE); +} + +static int +dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg) +{ + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg); + + if (agg->dtagd_nrecs == 0) + return (DTRACE_AGGWALK_NEXT); + + if (agg->dtagd_varid != id) + return (DTRACE_AGGWALK_NEXT); + + return (DTRACE_AGGWALK_CLEAR); +} + +typedef struct dt_trunc { + dtrace_aggvarid_t dttd_id; + uint64_t dttd_remaining; +} dt_trunc_t; + +static int +dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg) +{ + dt_trunc_t *trunc = arg; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + dtrace_aggvarid_t id = trunc->dttd_id; + + if (agg->dtagd_nrecs == 0) + return (DTRACE_AGGWALK_NEXT); + + if (agg->dtagd_varid != id) + return (DTRACE_AGGWALK_NEXT); + + if (trunc->dttd_remaining == 0) + return (DTRACE_AGGWALK_REMOVE); + + trunc->dttd_remaining--; + return (DTRACE_AGGWALK_NEXT); +} + +static int +dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec) +{ + dt_trunc_t trunc; + caddr_t addr; + int64_t remaining; + int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *); + + /* + * We (should) have two records: the aggregation ID followed by the + * number of aggregation entries after which the aggregation is to be + * truncated. + */ + addr = base + rec->dtrd_offset; + + if (rec->dtrd_size != sizeof (dtrace_aggvarid_t)) + return (dt_set_errno(dtp, EDT_BADTRUNC)); + + /* LINTED - alignment */ + trunc.dttd_id = *((dtrace_aggvarid_t *)addr); + rec++; + + if (rec->dtrd_action != DTRACEACT_LIBACT) + return (dt_set_errno(dtp, EDT_BADTRUNC)); + + if (rec->dtrd_arg != DT_ACT_TRUNC) + return (dt_set_errno(dtp, EDT_BADTRUNC)); + + addr = base + rec->dtrd_offset; + + switch (rec->dtrd_size) { + case sizeof (uint64_t): + /* LINTED - alignment */ + remaining = *((int64_t *)addr); + break; + case sizeof (uint32_t): + /* LINTED - alignment */ + remaining = *((int32_t *)addr); + break; + case sizeof (uint16_t): + /* LINTED - alignment */ + remaining = *((int16_t *)addr); + break; + case sizeof (uint8_t): + remaining = *((int8_t *)addr); + break; + default: + return (dt_set_errno(dtp, EDT_BADNORMAL)); + } + + if (remaining < 0) { + func = dtrace_aggregate_walk_valsorted; + remaining = -remaining; + } else { + func = dtrace_aggregate_walk_valrevsorted; + } + + assert(remaining >= 0); + trunc.dttd_remaining = remaining; + + (void) func(dtp, dt_trunc_agg, &trunc); + + return (0); +} + +static int +dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec, + caddr_t addr, size_t size, const dtrace_aggdata_t *aggdata, + uint64_t normal, dt_print_aggdata_t *pd) +{ + int err, width; + dtrace_actkind_t act = rec->dtrd_action; + boolean_t packed = pd->dtpa_agghist || pd->dtpa_aggpack; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + + static struct { + size_t size; + int width; + int packedwidth; + } *fmt, fmttab[] = { + { sizeof (uint8_t), 3, 3 }, + { sizeof (uint16_t), 5, 5 }, + { sizeof (uint32_t), 8, 8 }, + { sizeof (uint64_t), 16, 16 }, + { 0, -50, 16 } + }; + + if (packed && pd->dtpa_agghisthdr != agg->dtagd_varid) { + dtrace_recdesc_t *r; + + width = 0; + + /* + * To print our quantization header for either an agghist or + * aggpack aggregation, we need to iterate through all of our + * of our records to determine their width. + */ + for (r = rec; !DTRACEACT_ISAGG(r->dtrd_action); r++) { + for (fmt = fmttab; fmt->size && + fmt->size != r->dtrd_size; fmt++) + continue; + + width += fmt->packedwidth + 1; + } + + if (pd->dtpa_agghist) { + if (dt_print_quanthdr(dtp, fp, width) < 0) + return (-1); + } else { + if (dt_print_quanthdr_packed(dtp, fp, + width, aggdata, r->dtrd_action) < 0) + return (-1); + } + + pd->dtpa_agghisthdr = agg->dtagd_varid; + } + + if (pd->dtpa_agghist && DTRACEACT_ISAGG(act)) { + char positives = aggdata->dtada_flags & DTRACE_A_HASPOSITIVES; + char negatives = aggdata->dtada_flags & DTRACE_A_HASNEGATIVES; + int64_t val; + + assert(act == DTRACEAGG_SUM || act == DTRACEAGG_COUNT); + val = (long long)*((uint64_t *)addr); + + if (dt_printf(dtp, fp, " ") < 0) + return (-1); + + return (dt_print_quantline(dtp, fp, val, normal, + aggdata->dtada_total, positives, negatives)); + } + + if (pd->dtpa_aggpack && DTRACEACT_ISAGG(act)) { + switch (act) { + case DTRACEAGG_QUANTIZE: + return (dt_print_quantize_packed(dtp, + fp, addr, size, aggdata)); + case DTRACEAGG_LQUANTIZE: + return (dt_print_lquantize_packed(dtp, + fp, addr, size, aggdata)); + default: + break; + } + } + + switch (act) { + case DTRACEACT_STACK: + return (dt_print_stack(dtp, fp, NULL, addr, + rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg)); + + case DTRACEACT_USTACK: + case DTRACEACT_JSTACK: + return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg)); + + case DTRACEACT_USYM: + case DTRACEACT_UADDR: + return (dt_print_usym(dtp, fp, addr, act)); + + case DTRACEACT_UMOD: + return (dt_print_umod(dtp, fp, NULL, addr)); + + case DTRACEACT_SYM: + return (dt_print_sym(dtp, fp, NULL, addr)); + + case DTRACEACT_MOD: + return (dt_print_mod(dtp, fp, NULL, addr)); + + case DTRACEAGG_QUANTIZE: + return (dt_print_quantize(dtp, fp, addr, size, normal)); + + case DTRACEAGG_LQUANTIZE: + return (dt_print_lquantize(dtp, fp, addr, size, normal)); + + case DTRACEAGG_LLQUANTIZE: + return (dt_print_llquantize(dtp, fp, addr, size, normal)); + + case DTRACEAGG_AVG: + return (dt_print_average(dtp, fp, addr, size, normal)); + + case DTRACEAGG_STDDEV: + return (dt_print_stddev(dtp, fp, addr, size, normal)); + + default: + break; + } + + for (fmt = fmttab; fmt->size && fmt->size != size; fmt++) + continue; + + width = packed ? fmt->packedwidth : fmt->width; + + switch (size) { + case sizeof (uint64_t): + err = dt_printf(dtp, fp, " %*lld", width, + /* LINTED - alignment */ + (long long)*((uint64_t *)addr) / normal); + break; + case sizeof (uint32_t): + /* LINTED - alignment */ + err = dt_printf(dtp, fp, " %*d", width, *((uint32_t *)addr) / + (uint32_t)normal); + break; + case sizeof (uint16_t): + /* LINTED - alignment */ + err = dt_printf(dtp, fp, " %*d", width, *((uint16_t *)addr) / + (uint32_t)normal); + break; + case sizeof (uint8_t): + err = dt_printf(dtp, fp, " %*d", width, *((uint8_t *)addr) / + (uint32_t)normal); + break; + default: + err = dt_print_bytes(dtp, fp, addr, size, width, 0, 0); + break; + } + + return (err); +} + +int +dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg) +{ + int i, aggact = 0; + dt_print_aggdata_t *pd = arg; + const dtrace_aggdata_t *aggdata = aggsdata[0]; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + FILE *fp = pd->dtpa_fp; + dtrace_hdl_t *dtp = pd->dtpa_dtp; + dtrace_recdesc_t *rec; + dtrace_actkind_t act; + caddr_t addr; + size_t size; + + pd->dtpa_agghist = (aggdata->dtada_flags & DTRACE_A_TOTAL); + pd->dtpa_aggpack = (aggdata->dtada_flags & DTRACE_A_MINMAXBIN); + + /* + * Iterate over each record description in the key, printing the traced + * data, skipping the first datum (the tuple member created by the + * compiler). + */ + for (i = 1; i < agg->dtagd_nrecs; i++) { + rec = &agg->dtagd_rec[i]; + act = rec->dtrd_action; + addr = aggdata->dtada_data + rec->dtrd_offset; + size = rec->dtrd_size; + + if (DTRACEACT_ISAGG(act)) { + aggact = i; + break; + } + + if (dt_print_datum(dtp, fp, rec, addr, + size, aggdata, 1, pd) < 0) + return (-1); + + if (dt_buffered_flush(dtp, NULL, rec, aggdata, + DTRACE_BUFDATA_AGGKEY) < 0) + return (-1); + } + + assert(aggact != 0); + + for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) { + uint64_t normal; + + aggdata = aggsdata[i]; + agg = aggdata->dtada_desc; + rec = &agg->dtagd_rec[aggact]; + act = rec->dtrd_action; + addr = aggdata->dtada_data + rec->dtrd_offset; + size = rec->dtrd_size; + + assert(DTRACEACT_ISAGG(act)); + normal = aggdata->dtada_normal; + + if (dt_print_datum(dtp, fp, rec, addr, + size, aggdata, normal, pd) < 0) + return (-1); + + if (dt_buffered_flush(dtp, NULL, rec, aggdata, + DTRACE_BUFDATA_AGGVAL) < 0) + return (-1); + + if (!pd->dtpa_allunprint) + agg->dtagd_flags |= DTRACE_AGD_PRINTED; + } + + if (!pd->dtpa_agghist && !pd->dtpa_aggpack) { + if (dt_printf(dtp, fp, "\n") < 0) + return (-1); + } + + if (dt_buffered_flush(dtp, NULL, NULL, aggdata, + DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0) + return (-1); + + return (0); +} + +int +dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg) +{ + dt_print_aggdata_t *pd = arg; + dtrace_aggdesc_t *agg = aggdata->dtada_desc; + dtrace_aggvarid_t aggvarid = pd->dtpa_id; + + if (pd->dtpa_allunprint) { + if (agg->dtagd_flags & DTRACE_AGD_PRINTED) + return (0); + } else { + /* + * If we're not printing all unprinted aggregations, then the + * aggregation variable ID denotes a specific aggregation + * variable that we should print -- skip any other aggregations + * that we encounter. + */ + if (agg->dtagd_nrecs == 0) + return (0); + + if (aggvarid != agg->dtagd_varid) + return (0); + } + + return (dt_print_aggs(&aggdata, 1, arg)); +} + +int +dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data, + const char *option, const char *value) +{ + int len, rval; + char *msg; + const char *errstr; + dtrace_setoptdata_t optdata; + + bzero(&optdata, sizeof (optdata)); + (void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval); + + if (dtrace_setopt(dtp, option, value) == 0) { + (void) dtrace_getopt(dtp, option, &optdata.dtsda_newval); + optdata.dtsda_probe = data; + optdata.dtsda_option = option; + optdata.dtsda_handle = dtp; + + if ((rval = dt_handle_setopt(dtp, &optdata)) != 0) + return (rval); + + return (0); + } + + errstr = dtrace_errmsg(dtp, dtrace_errno(dtp)); + len = strlen(option) + strlen(value) + strlen(errstr) + 80; + msg = alloca(len); + + (void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n", + option, value, errstr); + + if ((rval = dt_handle_liberr(dtp, data, msg)) == 0) + return (0); + + return (rval); +} + +static int +dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, + dtrace_bufdesc_t *buf, boolean_t just_one, + dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg) +{ + dtrace_epid_t id; + size_t offs; + int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET); + int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET); + int rval, i, n; + uint64_t tracememsize = 0; + dtrace_probedata_t data; + uint64_t drops; + + bzero(&data, sizeof (data)); + data.dtpda_handle = dtp; + data.dtpda_cpu = cpu; + data.dtpda_flow = dtp->dt_flow; + data.dtpda_indent = dtp->dt_indent; + data.dtpda_prefix = dtp->dt_prefix; + + for (offs = buf->dtbd_oldest; offs < buf->dtbd_size; ) { + dtrace_eprobedesc_t *epd; + + /* + * We're guaranteed to have an ID. + */ + id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs); + + if (id == DTRACE_EPIDNONE) { + /* + * This is filler to assure proper alignment of the + * next record; we simply ignore it. + */ + offs += sizeof (id); + continue; + } + + if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc, + &data.dtpda_pdesc)) != 0) + return (rval); + + epd = data.dtpda_edesc; + data.dtpda_data = buf->dtbd_data + offs; + + if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) { + rval = dt_handle(dtp, &data); + + if (rval == DTRACE_CONSUME_NEXT) + goto nextepid; + + if (rval == DTRACE_CONSUME_ERROR) + return (-1); + } + + if (flow) + (void) dt_flowindent(dtp, &data, dtp->dt_last_epid, + buf, offs); + + rval = (*efunc)(&data, arg); + + if (flow) { + if (data.dtpda_flow == DTRACEFLOW_ENTRY) + data.dtpda_indent += 2; + } + + if (rval == DTRACE_CONSUME_NEXT) + goto nextepid; + + if (rval == DTRACE_CONSUME_ABORT) + return (dt_set_errno(dtp, EDT_DIRABORT)); + + if (rval != DTRACE_CONSUME_THIS) + return (dt_set_errno(dtp, EDT_BADRVAL)); + + for (i = 0; i < epd->dtepd_nrecs; i++) { + caddr_t addr; + dtrace_recdesc_t *rec = &epd->dtepd_rec[i]; + dtrace_actkind_t act = rec->dtrd_action; + + data.dtpda_data = buf->dtbd_data + offs + + rec->dtrd_offset; + addr = data.dtpda_data; + + if (act == DTRACEACT_LIBACT) { + uint64_t arg = rec->dtrd_arg; + dtrace_aggvarid_t id; + + switch (arg) { + case DT_ACT_CLEAR: + /* LINTED - alignment */ + id = *((dtrace_aggvarid_t *)addr); + (void) dtrace_aggregate_walk(dtp, + dt_clear_agg, &id); + continue; + + case DT_ACT_DENORMALIZE: + /* LINTED - alignment */ + id = *((dtrace_aggvarid_t *)addr); + (void) dtrace_aggregate_walk(dtp, + dt_denormalize_agg, &id); + continue; + + case DT_ACT_FTRUNCATE: + if (fp == NULL) + continue; + + (void) fflush(fp); + (void) ftruncate(fileno(fp), 0); + (void) fseeko(fp, 0, SEEK_SET); + continue; + + case DT_ACT_NORMALIZE: + if (i == epd->dtepd_nrecs - 1) + return (dt_set_errno(dtp, + EDT_BADNORMAL)); + + if (dt_normalize(dtp, + buf->dtbd_data + offs, rec) != 0) + return (-1); + + i++; + continue; + + case DT_ACT_SETOPT: { + uint64_t *opts = dtp->dt_options; + dtrace_recdesc_t *valrec; + uint32_t valsize; + caddr_t val; + int rv; + + if (i == epd->dtepd_nrecs - 1) { + return (dt_set_errno(dtp, + EDT_BADSETOPT)); + } + + valrec = &epd->dtepd_rec[++i]; + valsize = valrec->dtrd_size; + + if (valrec->dtrd_action != act || + valrec->dtrd_arg != arg) { + return (dt_set_errno(dtp, + EDT_BADSETOPT)); + } + + if (valsize > sizeof (uint64_t)) { + val = buf->dtbd_data + offs + + valrec->dtrd_offset; + } else { + val = "1"; + } + + rv = dt_setopt(dtp, &data, addr, val); + + if (rv != 0) + return (-1); + + flow = (opts[DTRACEOPT_FLOWINDENT] != + DTRACEOPT_UNSET); + quiet = (opts[DTRACEOPT_QUIET] != + DTRACEOPT_UNSET); + + continue; + } + + case DT_ACT_TRUNC: + if (i == epd->dtepd_nrecs - 1) + return (dt_set_errno(dtp, + EDT_BADTRUNC)); + + if (dt_trunc(dtp, + buf->dtbd_data + offs, rec) != 0) + return (-1); + + i++; + continue; + + default: + continue; + } + } + + if (act == DTRACEACT_TRACEMEM_DYNSIZE && + rec->dtrd_size == sizeof (uint64_t)) { + /* LINTED - alignment */ + tracememsize = *((unsigned long long *)addr); + continue; + } + + rval = (*rfunc)(&data, rec, arg); + + if (rval == DTRACE_CONSUME_NEXT) + continue; + + if (rval == DTRACE_CONSUME_ABORT) + return (dt_set_errno(dtp, EDT_DIRABORT)); + + if (rval != DTRACE_CONSUME_THIS) + return (dt_set_errno(dtp, EDT_BADRVAL)); + + if (act == DTRACEACT_STACK) { + int depth = rec->dtrd_arg; + + if (dt_print_stack(dtp, fp, NULL, addr, depth, + rec->dtrd_size / depth) < 0) + return (-1); + goto nextrec; + } + + if (act == DTRACEACT_USTACK || + act == DTRACEACT_JSTACK) { + if (dt_print_ustack(dtp, fp, NULL, + addr, rec->dtrd_arg) < 0) + return (-1); + goto nextrec; + } + + if (act == DTRACEACT_SYM) { + if (dt_print_sym(dtp, fp, NULL, addr) < 0) + return (-1); + goto nextrec; + } + + if (act == DTRACEACT_MOD) { + if (dt_print_mod(dtp, fp, NULL, addr) < 0) + return (-1); + goto nextrec; + } + + if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) { + if (dt_print_usym(dtp, fp, addr, act) < 0) + return (-1); + goto nextrec; + } + + if (act == DTRACEACT_UMOD) { + if (dt_print_umod(dtp, fp, NULL, addr) < 0) + return (-1); + goto nextrec; + } + + if (act == DTRACEACT_PRINTM) { + if (dt_print_memory(dtp, fp, addr) < 0) + return (-1); + goto nextrec; + } + + if (act == DTRACEACT_PRINTT) { + if (dt_print_type(dtp, fp, addr) < 0) + return (-1); + goto nextrec; + } + + if (DTRACEACT_ISPRINTFLIKE(act)) { + void *fmtdata; + int (*func)(dtrace_hdl_t *, FILE *, void *, + const dtrace_probedata_t *, + const dtrace_recdesc_t *, uint_t, + const void *buf, size_t); + + if ((fmtdata = dt_format_lookup(dtp, + rec->dtrd_format)) == NULL) + goto nofmt; + + switch (act) { + case DTRACEACT_PRINTF: + func = dtrace_fprintf; + break; + case DTRACEACT_PRINTA: + func = dtrace_fprinta; + break; + case DTRACEACT_SYSTEM: + func = dtrace_system; + break; + case DTRACEACT_FREOPEN: + func = dtrace_freopen; + break; + } + + n = (*func)(dtp, fp, fmtdata, &data, + rec, epd->dtepd_nrecs - i, + (uchar_t *)buf->dtbd_data + offs, + buf->dtbd_size - offs); + + if (n < 0) + return (-1); /* errno is set for us */ + + if (n > 0) + i += n - 1; + goto nextrec; + } + + /* + * If this is a DIF expression, and the record has a + * format set, this indicates we have a CTF type name + * associated with the data and we should try to print + * it out by type. + */ + if (act == DTRACEACT_DIFEXPR) { + const char *strdata = dt_strdata_lookup(dtp, + rec->dtrd_format); + if (strdata != NULL) { + n = dtrace_print(dtp, fp, strdata, + addr, rec->dtrd_size); + + /* + * dtrace_print() will return -1 on + * error, or return the number of bytes + * consumed. It will return 0 if the + * type couldn't be determined, and we + * should fall through to the normal + * trace method. + */ + if (n < 0) + return (-1); + + if (n > 0) + goto nextrec; + } + } + +nofmt: + if (act == DTRACEACT_PRINTA) { + dt_print_aggdata_t pd; + dtrace_aggvarid_t *aggvars; + int j, naggvars = 0; + size_t size = ((epd->dtepd_nrecs - i) * + sizeof (dtrace_aggvarid_t)); + + if ((aggvars = dt_alloc(dtp, size)) == NULL) + return (-1); + + /* + * This might be a printa() with multiple + * aggregation variables. We need to scan + * forward through the records until we find + * a record from a different statement. + */ + for (j = i; j < epd->dtepd_nrecs; j++) { + dtrace_recdesc_t *nrec; + caddr_t naddr; + + nrec = &epd->dtepd_rec[j]; + + if (nrec->dtrd_uarg != rec->dtrd_uarg) + break; + + if (nrec->dtrd_action != act) { + return (dt_set_errno(dtp, + EDT_BADAGG)); + } + + naddr = buf->dtbd_data + offs + + nrec->dtrd_offset; + + aggvars[naggvars++] = + /* LINTED - alignment */ + *((dtrace_aggvarid_t *)naddr); + } + + i = j - 1; + bzero(&pd, sizeof (pd)); + pd.dtpa_dtp = dtp; + pd.dtpa_fp = fp; + + assert(naggvars >= 1); + + if (naggvars == 1) { + pd.dtpa_id = aggvars[0]; + dt_free(dtp, aggvars); + + if (dt_printf(dtp, fp, "\n") < 0 || + dtrace_aggregate_walk_sorted(dtp, + dt_print_agg, &pd) < 0) + return (-1); + goto nextrec; + } + + if (dt_printf(dtp, fp, "\n") < 0 || + dtrace_aggregate_walk_joined(dtp, aggvars, + naggvars, dt_print_aggs, &pd) < 0) { + dt_free(dtp, aggvars); + return (-1); + } + + dt_free(dtp, aggvars); + goto nextrec; + } + + if (act == DTRACEACT_TRACEMEM) { + if (tracememsize == 0 || + tracememsize > rec->dtrd_size) { + tracememsize = rec->dtrd_size; + } + + n = dt_print_bytes(dtp, fp, addr, + tracememsize, -33, quiet, 1); + + tracememsize = 0; + + if (n < 0) + return (-1); + + goto nextrec; + } + + switch (rec->dtrd_size) { + case sizeof (uint64_t): + n = dt_printf(dtp, fp, + quiet ? "%lld" : " %16lld", + /* LINTED - alignment */ + *((unsigned long long *)addr)); + break; + case sizeof (uint32_t): + n = dt_printf(dtp, fp, quiet ? "%d" : " %8d", + /* LINTED - alignment */ + *((uint32_t *)addr)); + break; + case sizeof (uint16_t): + n = dt_printf(dtp, fp, quiet ? "%d" : " %5d", + /* LINTED - alignment */ + *((uint16_t *)addr)); + break; + case sizeof (uint8_t): + n = dt_printf(dtp, fp, quiet ? "%d" : " %3d", + *((uint8_t *)addr)); + break; + default: + n = dt_print_bytes(dtp, fp, addr, + rec->dtrd_size, -33, quiet, 0); + break; + } + + if (n < 0) + return (-1); /* errno is set for us */ + +nextrec: + if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0) + return (-1); /* errno is set for us */ + } + + /* + * Call the record callback with a NULL record to indicate + * that we're done processing this EPID. + */ + rval = (*rfunc)(&data, NULL, arg); +nextepid: + offs += epd->dtepd_size; + dtp->dt_last_epid = id; + if (just_one) { + buf->dtbd_oldest = offs; + break; + } + } + + dtp->dt_flow = data.dtpda_flow; + dtp->dt_indent = data.dtpda_indent; + dtp->dt_prefix = data.dtpda_prefix; + + if ((drops = buf->dtbd_drops) == 0) + return (0); + + /* + * Explicitly zero the drops to prevent us from processing them again. + */ + buf->dtbd_drops = 0; + + return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops)); +} + +/* + * Reduce memory usage by shrinking the buffer if it's no more than half full. + * Note, we need to preserve the alignment of the data at dtbd_oldest, which is + * only 4-byte aligned. + */ +static void +dt_realloc_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf, int cursize) +{ + uint64_t used = buf->dtbd_size - buf->dtbd_oldest; + if (used < cursize / 2) { + int misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1); + char *newdata = dt_alloc(dtp, used + misalign); + if (newdata == NULL) + return; + bzero(newdata, misalign); + bcopy(buf->dtbd_data + buf->dtbd_oldest, + newdata + misalign, used); + dt_free(dtp, buf->dtbd_data); + buf->dtbd_oldest = misalign; + buf->dtbd_size = used + misalign; + buf->dtbd_data = newdata; + } +} + +/* + * If the ring buffer has wrapped, the data is not in order. Rearrange it + * so that it is. Note, we need to preserve the alignment of the data at + * dtbd_oldest, which is only 4-byte aligned. + */ +static int +dt_unring_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf) +{ + int misalign; + char *newdata, *ndp; + + if (buf->dtbd_oldest == 0) + return (0); + + misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1); + newdata = ndp = dt_alloc(dtp, buf->dtbd_size + misalign); + + if (newdata == NULL) + return (-1); + + assert(0 == (buf->dtbd_size & (sizeof (uint64_t) - 1))); + + bzero(ndp, misalign); + ndp += misalign; + + bcopy(buf->dtbd_data + buf->dtbd_oldest, ndp, + buf->dtbd_size - buf->dtbd_oldest); + ndp += buf->dtbd_size - buf->dtbd_oldest; + + bcopy(buf->dtbd_data, ndp, buf->dtbd_oldest); + + dt_free(dtp, buf->dtbd_data); + buf->dtbd_oldest = 0; + buf->dtbd_data = newdata; + buf->dtbd_size += misalign; + + return (0); +} + +static void +dt_put_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf) +{ + dt_free(dtp, buf->dtbd_data); + dt_free(dtp, buf); +} + +/* + * Returns 0 on success, in which case *cbp will be filled in if we retrieved + * data, or NULL if there is no data for this CPU. + * Returns -1 on failure and sets dt_errno. + */ +static int +dt_get_buf(dtrace_hdl_t *dtp, int cpu, dtrace_bufdesc_t **bufp) +{ + dtrace_optval_t size; + dtrace_bufdesc_t *buf = dt_zalloc(dtp, sizeof (*buf)); + int error, rval; + + if (buf == NULL) + return (-1); + + (void) dtrace_getopt(dtp, "bufsize", &size); + buf->dtbd_data = dt_alloc(dtp, size); + if (buf->dtbd_data == NULL) { + dt_free(dtp, buf); + return (-1); + } + buf->dtbd_size = size; + buf->dtbd_cpu = cpu; + +#if defined(sun) + if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) { +#else + if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) { +#endif + /* + * If we failed with ENOENT, it may be because the + * CPU was unconfigured -- this is okay. Any other + * error, however, is unexpected. + */ + if (errno == ENOENT) { + *bufp = NULL; + rval = 0; + } else + rval = dt_set_errno(dtp, errno); + + dt_put_buf(dtp, buf); + return (rval); + } + + error = dt_unring_buf(dtp, buf); + if (error != 0) { + dt_put_buf(dtp, buf); + return (error); + } + dt_realloc_buf(dtp, buf, size); + + *bufp = buf; + return (0); +} + +typedef struct dt_begin { + dtrace_consume_probe_f *dtbgn_probefunc; + dtrace_consume_rec_f *dtbgn_recfunc; + void *dtbgn_arg; + dtrace_handle_err_f *dtbgn_errhdlr; + void *dtbgn_errarg; + int dtbgn_beginonly; +} dt_begin_t; + +static int +dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg) +{ + dt_begin_t *begin = arg; + dtrace_probedesc_t *pd = data->dtpda_pdesc; + + int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0); + int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0); + + if (begin->dtbgn_beginonly) { + if (!(r1 && r2)) + return (DTRACE_CONSUME_NEXT); + } else { + if (r1 && r2) + return (DTRACE_CONSUME_NEXT); + } + + /* + * We have a record that we're interested in. Now call the underlying + * probe function... + */ + return (begin->dtbgn_probefunc(data, begin->dtbgn_arg)); +} + +static int +dt_consume_begin_record(const dtrace_probedata_t *data, + const dtrace_recdesc_t *rec, void *arg) +{ + dt_begin_t *begin = arg; + + return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg)); +} + +static int +dt_consume_begin_error(const dtrace_errdata_t *data, void *arg) +{ + dt_begin_t *begin = (dt_begin_t *)arg; + dtrace_probedesc_t *pd = data->dteda_pdesc; + + int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0); + int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0); + + if (begin->dtbgn_beginonly) { + if (!(r1 && r2)) + return (DTRACE_HANDLE_OK); + } else { + if (r1 && r2) + return (DTRACE_HANDLE_OK); + } + + return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg)); +} + +static int +dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, + dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg) +{ + /* + * There's this idea that the BEGIN probe should be processed before + * everything else, and that the END probe should be processed after + * anything else. In the common case, this is pretty easy to deal + * with. However, a situation may arise where the BEGIN enabling and + * END enabling are on the same CPU, and some enabling in the middle + * occurred on a different CPU. To deal with this (blech!) we need to + * consume the BEGIN buffer up until the end of the BEGIN probe, and + * then set it aside. We will then process every other CPU, and then + * we'll return to the BEGIN CPU and process the rest of the data + * (which will inevitably include the END probe, if any). Making this + * even more complicated (!) is the library's ERROR enabling. Because + * this enabling is processed before we even get into the consume call + * back, any ERROR firing would result in the library's ERROR enabling + * being processed twice -- once in our first pass (for BEGIN probes), + * and again in our second pass (for everything but BEGIN probes). To + * deal with this, we interpose on the ERROR handler to assure that we + * only process ERROR enablings induced by BEGIN enablings in the + * first pass, and that we only process ERROR enablings _not_ induced + * by BEGIN enablings in the second pass. + */ + + dt_begin_t begin; + processorid_t cpu = dtp->dt_beganon; + int rval, i; + static int max_ncpus; + dtrace_bufdesc_t *buf; + + dtp->dt_beganon = -1; + + if (dt_get_buf(dtp, cpu, &buf) != 0) + return (-1); + if (buf == NULL) + return (0); + + if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) { + /* + * This is the simple case. We're either not stopped, or if + * we are, we actually processed any END probes on another + * CPU. We can simply consume this buffer and return. + */ + rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE, + pf, rf, arg); + dt_put_buf(dtp, buf); + return (rval); + } + + begin.dtbgn_probefunc = pf; + begin.dtbgn_recfunc = rf; + begin.dtbgn_arg = arg; + begin.dtbgn_beginonly = 1; + + /* + * We need to interpose on the ERROR handler to be sure that we + * only process ERRORs induced by BEGIN. + */ + begin.dtbgn_errhdlr = dtp->dt_errhdlr; + begin.dtbgn_errarg = dtp->dt_errarg; + dtp->dt_errhdlr = dt_consume_begin_error; + dtp->dt_errarg = &begin; + + rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE, + dt_consume_begin_probe, dt_consume_begin_record, &begin); + + dtp->dt_errhdlr = begin.dtbgn_errhdlr; + dtp->dt_errarg = begin.dtbgn_errarg; + + if (rval != 0) { + dt_put_buf(dtp, buf); + return (rval); + } + + if (max_ncpus == 0) + max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1; + + for (i = 0; i < max_ncpus; i++) { + dtrace_bufdesc_t *nbuf; + if (i == cpu) + continue; + + if (dt_get_buf(dtp, i, &nbuf) != 0) { + dt_put_buf(dtp, buf); + return (-1); + } + if (nbuf == NULL) + continue; + + rval = dt_consume_cpu(dtp, fp, i, nbuf, B_FALSE, + pf, rf, arg); + dt_put_buf(dtp, nbuf); + if (rval != 0) { + dt_put_buf(dtp, buf); + return (rval); + } + } + + /* + * Okay -- we're done with the other buffers. Now we want to + * reconsume the first buffer -- but this time we're looking for + * everything _but_ BEGIN. And of course, in order to only consume + * those ERRORs _not_ associated with BEGIN, we need to reinstall our + * ERROR interposition function... + */ + begin.dtbgn_beginonly = 0; + + assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr); + assert(begin.dtbgn_errarg == dtp->dt_errarg); + dtp->dt_errhdlr = dt_consume_begin_error; + dtp->dt_errarg = &begin; + + rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE, + dt_consume_begin_probe, dt_consume_begin_record, &begin); + + dtp->dt_errhdlr = begin.dtbgn_errhdlr; + dtp->dt_errarg = begin.dtbgn_errarg; + + return (rval); +} + +/* ARGSUSED */ +static uint64_t +dt_buf_oldest(void *elem, void *arg) +{ + dtrace_bufdesc_t *buf = elem; + size_t offs = buf->dtbd_oldest; + + while (offs < buf->dtbd_size) { + dtrace_rechdr_t *dtrh = + /* LINTED - alignment */ + (dtrace_rechdr_t *)(buf->dtbd_data + offs); + if (dtrh->dtrh_epid == DTRACE_EPIDNONE) { + offs += sizeof (dtrace_epid_t); + } else { + return (DTRACE_RECORD_LOAD_TIMESTAMP(dtrh)); + } + } + + /* There are no records left; use the time the buffer was retrieved. */ + return (buf->dtbd_timestamp); +} + +int +dtrace_consume(dtrace_hdl_t *dtp, FILE *fp, + dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg) +{ + dtrace_optval_t size; + static int max_ncpus; + int i, rval; + dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE]; + hrtime_t now = gethrtime(); + + if (dtp->dt_lastswitch != 0) { + if (now - dtp->dt_lastswitch < interval) + return (0); + + dtp->dt_lastswitch += interval; + } else { + dtp->dt_lastswitch = now; + } + + if (!dtp->dt_active) + return (dt_set_errno(dtp, EINVAL)); + + if (max_ncpus == 0) + max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1; + + if (pf == NULL) + pf = (dtrace_consume_probe_f *)dt_nullprobe; + + if (rf == NULL) + rf = (dtrace_consume_rec_f *)dt_nullrec; + + if (dtp->dt_options[DTRACEOPT_TEMPORAL] == DTRACEOPT_UNSET) { + /* + * The output will not be in the order it was traced. Rather, + * we will consume all of the data from each CPU's buffer in + * turn. We apply special handling for the records from BEGIN + * and END probes so that they are consumed first and last, + * respectively. + * + * If we have just begun, we want to first process the CPU that + * executed the BEGIN probe (if any). + */ + if (dtp->dt_active && dtp->dt_beganon != -1 && + (rval = dt_consume_begin(dtp, fp, pf, rf, arg)) != 0) + return (rval); + + for (i = 0; i < max_ncpus; i++) { + dtrace_bufdesc_t *buf; + + /* + * If we have stopped, we want to process the CPU on + * which the END probe was processed only _after_ we + * have processed everything else. + */ + if (dtp->dt_stopped && (i == dtp->dt_endedon)) + continue; + + if (dt_get_buf(dtp, i, &buf) != 0) + return (-1); + if (buf == NULL) + continue; + + dtp->dt_flow = 0; + dtp->dt_indent = 0; + dtp->dt_prefix = NULL; + rval = dt_consume_cpu(dtp, fp, i, + buf, B_FALSE, pf, rf, arg); + dt_put_buf(dtp, buf); + if (rval != 0) + return (rval); + } + if (dtp->dt_stopped) { + dtrace_bufdesc_t *buf; + + if (dt_get_buf(dtp, dtp->dt_endedon, &buf) != 0) + return (-1); + if (buf == NULL) + return (0); + + rval = dt_consume_cpu(dtp, fp, dtp->dt_endedon, + buf, B_FALSE, pf, rf, arg); + dt_put_buf(dtp, buf); + return (rval); + } + } else { + /* + * The output will be in the order it was traced (or for + * speculations, when it was committed). We retrieve a buffer + * from each CPU and put it into a priority queue, which sorts + * based on the first entry in the buffer. This is sufficient + * because entries within a buffer are already sorted. + * + * We then consume records one at a time, always consuming the + * oldest record, as determined by the priority queue. When + * we reach the end of the time covered by these buffers, + * we need to stop and retrieve more records on the next pass. + * The kernel tells us the time covered by each buffer, in + * dtbd_timestamp. The first buffer's timestamp tells us the + * time covered by all buffers, as subsequently retrieved + * buffers will cover to a more recent time. + */ + + uint64_t *drops = alloca(max_ncpus * sizeof (uint64_t)); + uint64_t first_timestamp = 0; + uint_t cookie = 0; + dtrace_bufdesc_t *buf; + + bzero(drops, max_ncpus * sizeof (uint64_t)); + + if (dtp->dt_bufq == NULL) { + dtp->dt_bufq = dt_pq_init(dtp, max_ncpus * 2, + dt_buf_oldest, NULL); + if (dtp->dt_bufq == NULL) /* ENOMEM */ + return (-1); + } + + /* Retrieve data from each CPU. */ + (void) dtrace_getopt(dtp, "bufsize", &size); + for (i = 0; i < max_ncpus; i++) { + dtrace_bufdesc_t *buf; + + if (dt_get_buf(dtp, i, &buf) != 0) + return (-1); + if (buf != NULL) { + if (first_timestamp == 0) + first_timestamp = buf->dtbd_timestamp; + assert(buf->dtbd_timestamp >= first_timestamp); + + dt_pq_insert(dtp->dt_bufq, buf); + drops[i] = buf->dtbd_drops; + buf->dtbd_drops = 0; + } + } + + /* Consume records. */ + for (;;) { + dtrace_bufdesc_t *buf = dt_pq_pop(dtp->dt_bufq); + uint64_t timestamp; + + if (buf == NULL) + break; + + timestamp = dt_buf_oldest(buf, dtp); + assert(timestamp >= dtp->dt_last_timestamp); + dtp->dt_last_timestamp = timestamp; + + if (timestamp == buf->dtbd_timestamp) { + /* + * We've reached the end of the time covered + * by this buffer. If this is the oldest + * buffer, we must do another pass + * to retrieve more data. + */ + dt_put_buf(dtp, buf); + if (timestamp == first_timestamp && + !dtp->dt_stopped) + break; + continue; + } + + if ((rval = dt_consume_cpu(dtp, fp, + buf->dtbd_cpu, buf, B_TRUE, pf, rf, arg)) != 0) + return (rval); + dt_pq_insert(dtp->dt_bufq, buf); + } + + /* Consume drops. */ + for (i = 0; i < max_ncpus; i++) { + if (drops[i] != 0) { + int error = dt_handle_cpudrop(dtp, i, + DTRACEDROP_PRINCIPAL, drops[i]); + if (error != 0) + return (error); + } + } + + /* + * Reduce memory usage by re-allocating smaller buffers + * for the "remnants". + */ + while (buf = dt_pq_walk(dtp->dt_bufq, &cookie)) + dt_realloc_buf(dtp, buf, buf->dtbd_size); + } + + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_decl.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_decl.c new file mode 100644 index 0000000..d717d56 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_decl.c @@ -0,0 +1,1129 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <strings.h> +#include <stdlib.h> +#include <limits.h> +#include <alloca.h> +#include <assert.h> + +#include <dt_decl.h> +#include <dt_parser.h> +#include <dt_module.h> +#include <dt_impl.h> + +static dt_decl_t * +dt_decl_check(dt_decl_t *ddp) +{ + if (ddp->dd_kind == CTF_K_UNKNOWN) + return (ddp); /* nothing to check if the type is not yet set */ + + if (ddp->dd_name != NULL && strcmp(ddp->dd_name, "char") == 0 && + (ddp->dd_attr & (DT_DA_SHORT | DT_DA_LONG | DT_DA_LONGLONG))) { + xyerror(D_DECL_CHARATTR, "invalid type declaration: short and " + "long may not be used with char type\n"); + } + + if (ddp->dd_name != NULL && strcmp(ddp->dd_name, "void") == 0 && + (ddp->dd_attr & (DT_DA_SHORT | DT_DA_LONG | DT_DA_LONGLONG | + (DT_DA_SIGNED | DT_DA_UNSIGNED)))) { + xyerror(D_DECL_VOIDATTR, "invalid type declaration: attributes " + "may not be used with void type\n"); + } + + if (ddp->dd_kind != CTF_K_INTEGER && + (ddp->dd_attr & (DT_DA_SIGNED | DT_DA_UNSIGNED))) { + xyerror(D_DECL_SIGNINT, "invalid type declaration: signed and " + "unsigned may only be used with integer type\n"); + } + + if (ddp->dd_kind != CTF_K_INTEGER && ddp->dd_kind != CTF_K_FLOAT && + (ddp->dd_attr & (DT_DA_LONG | DT_DA_LONGLONG))) { + xyerror(D_DECL_LONGINT, "invalid type declaration: long and " + "long long may only be used with integer or " + "floating-point type\n"); + } + + return (ddp); +} + +dt_decl_t * +dt_decl_alloc(ushort_t kind, char *name) +{ + dt_decl_t *ddp = malloc(sizeof (dt_decl_t)); + + if (ddp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + ddp->dd_kind = kind; + ddp->dd_attr = 0; + ddp->dd_ctfp = NULL; + ddp->dd_type = CTF_ERR; + ddp->dd_name = name; + ddp->dd_node = NULL; + ddp->dd_next = NULL; + + return (ddp); +} + +void +dt_decl_free(dt_decl_t *ddp) +{ + dt_decl_t *ndp; + + for (; ddp != NULL; ddp = ndp) { + ndp = ddp->dd_next; + free(ddp->dd_name); + dt_node_list_free(&ddp->dd_node); + free(ddp); + } +} + +void +dt_decl_reset(void) +{ + dt_scope_t *dsp = &yypcb->pcb_dstack; + dt_decl_t *ddp = dsp->ds_decl; + + while (ddp->dd_next != NULL) { + dsp->ds_decl = ddp->dd_next; + ddp->dd_next = NULL; + dt_decl_free(ddp); + ddp = dsp->ds_decl; + } +} + +dt_decl_t * +dt_decl_push(dt_decl_t *ddp) +{ + dt_scope_t *dsp = &yypcb->pcb_dstack; + dt_decl_t *top = dsp->ds_decl; + + if (top != NULL && + top->dd_kind == CTF_K_UNKNOWN && top->dd_name == NULL) { + top->dd_kind = CTF_K_INTEGER; + (void) dt_decl_check(top); + } + + assert(ddp->dd_next == NULL); + ddp->dd_next = top; + dsp->ds_decl = ddp; + + return (ddp); +} + +dt_decl_t * +dt_decl_pop(void) +{ + dt_scope_t *dsp = &yypcb->pcb_dstack; + dt_decl_t *ddp = dt_decl_top(); + + dsp->ds_decl = NULL; + free(dsp->ds_ident); + dsp->ds_ident = NULL; + dsp->ds_ctfp = NULL; + dsp->ds_type = CTF_ERR; + dsp->ds_class = DT_DC_DEFAULT; + dsp->ds_enumval = -1; + + return (ddp); +} + +dt_decl_t * +dt_decl_pop_param(char **idp) +{ + dt_scope_t *dsp = &yypcb->pcb_dstack; + + if (dsp->ds_class != DT_DC_DEFAULT && dsp->ds_class != DT_DC_REGISTER) { + xyerror(D_DECL_PARMCLASS, "inappropriate storage class " + "for function or associative array parameter\n"); + } + + if (idp != NULL && dt_decl_top() != NULL) { + *idp = dsp->ds_ident; + dsp->ds_ident = NULL; + } + + return (dt_decl_pop()); +} + +dt_decl_t * +dt_decl_top(void) +{ + dt_decl_t *ddp = yypcb->pcb_dstack.ds_decl; + + if (ddp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NODECL); + + if (ddp->dd_kind == CTF_K_UNKNOWN && ddp->dd_name == NULL) { + ddp->dd_kind = CTF_K_INTEGER; + (void) dt_decl_check(ddp); + } + + return (ddp); +} + +dt_decl_t * +dt_decl_ident(char *name) +{ + dt_scope_t *dsp = &yypcb->pcb_dstack; + dt_decl_t *ddp = dsp->ds_decl; + + if (dsp->ds_ident != NULL) { + free(name); + xyerror(D_DECL_IDENT, "old-style declaration or " + "incorrect type specified\n"); + } + + dsp->ds_ident = name; + + if (ddp == NULL) + ddp = dt_decl_push(dt_decl_alloc(CTF_K_UNKNOWN, NULL)); + + return (ddp); +} + +void +dt_decl_class(dt_dclass_t class) +{ + dt_scope_t *dsp = &yypcb->pcb_dstack; + + if (dsp->ds_class != DT_DC_DEFAULT) { + xyerror(D_DECL_CLASS, "only one storage class allowed " + "in a declaration\n"); + } + + dsp->ds_class = class; +} + +/* + * Set the kind and name of the current declaration. If none is allocated, + * make a new decl and push it on to the top of our stack. If the name or kind + * is already set for the current decl, then we need to fail this declaration. + * This can occur because too many types were given (e.g. "int int"), etc. + */ +dt_decl_t * +dt_decl_spec(ushort_t kind, char *name) +{ + dt_decl_t *ddp = yypcb->pcb_dstack.ds_decl; + + if (ddp == NULL) + return (dt_decl_push(dt_decl_alloc(kind, name))); + + /* + * If we already have a type name specified and we see another type + * name, this is an error if the declaration is a typedef. If the + * declaration is not a typedef, then the user may be trying to declare + * a variable whose name has been returned by lex as a TNAME token: + * call dt_decl_ident() as if the grammar's IDENT rule was matched. + */ + if (ddp->dd_name != NULL && kind == CTF_K_TYPEDEF) { + if (yypcb->pcb_dstack.ds_class != DT_DC_TYPEDEF) + return (dt_decl_ident(name)); + xyerror(D_DECL_IDRED, "identifier redeclared: %s\n", name); + } + + if (ddp->dd_name != NULL || ddp->dd_kind != CTF_K_UNKNOWN) + xyerror(D_DECL_COMBO, "invalid type combination\n"); + + ddp->dd_kind = kind; + ddp->dd_name = name; + + return (dt_decl_check(ddp)); +} + +dt_decl_t * +dt_decl_attr(ushort_t attr) +{ + dt_decl_t *ddp = yypcb->pcb_dstack.ds_decl; + + if (ddp == NULL) { + ddp = dt_decl_push(dt_decl_alloc(CTF_K_UNKNOWN, NULL)); + ddp->dd_attr = attr; + return (ddp); + } + + if (attr == DT_DA_LONG && (ddp->dd_attr & DT_DA_LONG)) { + ddp->dd_attr &= ~DT_DA_LONG; + attr = DT_DA_LONGLONG; + } + + ddp->dd_attr |= attr; + return (dt_decl_check(ddp)); +} + +/* + * Examine the list of formal parameters 'flist' and determine if the formal + * name fnp->dn_string is defined in this list (B_TRUE) or not (B_FALSE). + * If 'fnp' is in 'flist', do not search beyond 'fnp' itself in 'flist'. + */ +static int +dt_decl_protoform(dt_node_t *fnp, dt_node_t *flist) +{ + dt_node_t *dnp; + + for (dnp = flist; dnp != fnp && dnp != NULL; dnp = dnp->dn_list) { + if (dnp->dn_string != NULL && + strcmp(dnp->dn_string, fnp->dn_string) == 0) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Common code for parsing array, function, and probe definition prototypes. + * The prototype node list is specified as 'plist'. The formal prototype + * against which to compare the prototype is specified as 'flist'. If plist + * and flist are the same, we require that named parameters are unique. If + * plist and flist are different, we require that named parameters in plist + * match a name that is present in flist. + */ +int +dt_decl_prototype(dt_node_t *plist, + dt_node_t *flist, const char *kind, uint_t flags) +{ + char n[DT_TYPE_NAMELEN]; + int is_void, v = 0, i = 1; + int form = plist != flist; + dt_node_t *dnp; + + for (dnp = plist; dnp != NULL; dnp = dnp->dn_list, i++) { + + if (dnp->dn_type == CTF_ERR && !(flags & DT_DP_VARARGS)) { + dnerror(dnp, D_DECL_PROTO_VARARGS, "%s prototype may " + "not use a variable-length argument list\n", kind); + } + + if (dt_node_is_dynamic(dnp) && !(flags & DT_DP_DYNAMIC)) { + dnerror(dnp, D_DECL_PROTO_TYPE, "%s prototype may not " + "use parameter of type %s: %s, parameter #%d\n", + kind, dt_node_type_name(dnp, n, sizeof (n)), + dnp->dn_string ? dnp->dn_string : "(anonymous)", i); + } + + is_void = dt_node_is_void(dnp); + v += is_void; + + if (is_void && !(flags & DT_DP_VOID)) { + dnerror(dnp, D_DECL_PROTO_TYPE, "%s prototype may not " + "use parameter of type %s: %s, parameter #%d\n", + kind, dt_node_type_name(dnp, n, sizeof (n)), + dnp->dn_string ? dnp->dn_string : "(anonymous)", i); + } + + if (is_void && dnp->dn_string != NULL) { + dnerror(dnp, D_DECL_PROTO_NAME, "void parameter may " + "not have a name: %s\n", dnp->dn_string); + } + + if (dnp->dn_string != NULL && + dt_decl_protoform(dnp, flist) != form) { + dnerror(dnp, D_DECL_PROTO_FORM, "parameter is " + "%s declared in %s prototype: %s, parameter #%d\n", + form ? "not" : "already", kind, dnp->dn_string, i); + } + + if (dnp->dn_string == NULL && + !is_void && !(flags & DT_DP_ANON)) { + dnerror(dnp, D_DECL_PROTO_NAME, "parameter declaration " + "requires a name: parameter #%d\n", i); + } + } + + if (v != 0 && plist->dn_list != NULL) + xyerror(D_DECL_PROTO_VOID, "void must be sole parameter\n"); + + return (v ? 0 : i - 1); /* return zero if sole parameter is 'void' */ +} + +dt_decl_t * +dt_decl_array(dt_node_t *dnp) +{ + dt_decl_t *ddp = dt_decl_push(dt_decl_alloc(CTF_K_ARRAY, NULL)); + dt_scope_t *dsp = &yypcb->pcb_dstack; + dt_decl_t *ndp = ddp; + + /* + * After pushing the array on to the decl stack, scan ahead for multi- + * dimensional array declarations and push the current decl to the + * bottom to match the resulting CTF type tree and data layout. Refer + * to the comments in dt_decl_type() and ISO C 6.5.2.1 for more info. + */ + while (ndp->dd_next != NULL && ndp->dd_next->dd_kind == CTF_K_ARRAY) + ndp = ndp->dd_next; /* skip to bottom-most array declaration */ + + if (ndp != ddp) { + if (dnp != NULL && dnp->dn_kind == DT_NODE_TYPE) { + xyerror(D_DECL_DYNOBJ, + "cannot declare array of associative arrays\n"); + } + dsp->ds_decl = ddp->dd_next; + ddp->dd_next = ndp->dd_next; + ndp->dd_next = ddp; + } + + if (ddp->dd_next->dd_name != NULL && + strcmp(ddp->dd_next->dd_name, "void") == 0) + xyerror(D_DECL_VOIDOBJ, "cannot declare array of void\n"); + + if (dnp != NULL && dnp->dn_kind != DT_NODE_TYPE) { + dnp = ddp->dd_node = dt_node_cook(dnp, DT_IDFLG_REF); + + if (dt_node_is_posconst(dnp) == 0) { + xyerror(D_DECL_ARRSUB, "positive integral constant " + "expression or tuple signature expected as " + "array declaration subscript\n"); + } + + if (dnp->dn_value > UINT_MAX) + xyerror(D_DECL_ARRBIG, "array dimension too big\n"); + + } else if (dnp != NULL) { + ddp->dd_node = dnp; + (void) dt_decl_prototype(dnp, dnp, "array", DT_DP_ANON); + } + + return (ddp); +} + +/* + * When a function is declared, we need to fudge the decl stack a bit if the + * declaration uses the function pointer (*)() syntax. In this case, the + * dt_decl_func() call occurs *after* the dt_decl_ptr() call, even though the + * resulting type is "pointer to function". To make the pointer land on top, + * we check to see if 'pdp' is non-NULL and a pointer. If it is, we search + * backward for a decl tagged with DT_DA_PAREN, and if one is found, the func + * decl is inserted behind this node in the decl list instead of at the top. + * In all cases, the func decl's dd_next pointer is set to the decl chain + * for the function's return type and the function parameter list is discarded. + */ +dt_decl_t * +dt_decl_func(dt_decl_t *pdp, dt_node_t *dnp) +{ + dt_decl_t *ddp = dt_decl_alloc(CTF_K_FUNCTION, NULL); + + ddp->dd_node = dnp; + + (void) dt_decl_prototype(dnp, dnp, "function", + DT_DP_VARARGS | DT_DP_VOID | DT_DP_ANON); + + if (pdp == NULL || pdp->dd_kind != CTF_K_POINTER) + return (dt_decl_push(ddp)); + + while (pdp->dd_next != NULL && !(pdp->dd_next->dd_attr & DT_DA_PAREN)) + pdp = pdp->dd_next; + + if (pdp->dd_next == NULL) + return (dt_decl_push(ddp)); + + ddp->dd_next = pdp->dd_next; + pdp->dd_next = ddp; + + return (pdp); +} + +dt_decl_t * +dt_decl_ptr(void) +{ + return (dt_decl_push(dt_decl_alloc(CTF_K_POINTER, NULL))); +} + +dt_decl_t * +dt_decl_sou(uint_t kind, char *name) +{ + dt_decl_t *ddp = dt_decl_spec(kind, name); + char n[DT_TYPE_NAMELEN]; + ctf_file_t *ctfp; + ctf_id_t type; + uint_t flag; + + if (yypcb->pcb_idepth != 0) + ctfp = yypcb->pcb_hdl->dt_cdefs->dm_ctfp; + else + ctfp = yypcb->pcb_hdl->dt_ddefs->dm_ctfp; + + if (yypcb->pcb_dstack.ds_next != NULL) + flag = CTF_ADD_NONROOT; + else + flag = CTF_ADD_ROOT; + + (void) snprintf(n, sizeof (n), "%s %s", + kind == CTF_K_STRUCT ? "struct" : "union", + name == NULL ? "(anon)" : name); + + if (name != NULL && (type = ctf_lookup_by_name(ctfp, n)) != CTF_ERR && + ctf_type_kind(ctfp, type) != CTF_K_FORWARD) + xyerror(D_DECL_TYPERED, "type redeclared: %s\n", n); + + if (kind == CTF_K_STRUCT) + type = ctf_add_struct(ctfp, flag, name); + else + type = ctf_add_union(ctfp, flag, name); + + if (type == CTF_ERR || ctf_update(ctfp) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to define %s: %s\n", + n, ctf_errmsg(ctf_errno(ctfp))); + } + + ddp->dd_ctfp = ctfp; + ddp->dd_type = type; + + dt_scope_push(ctfp, type); + return (ddp); +} + +void +dt_decl_member(dt_node_t *dnp) +{ + dt_scope_t *dsp = yypcb->pcb_dstack.ds_next; + dt_decl_t *ddp = yypcb->pcb_dstack.ds_decl; + char *ident = yypcb->pcb_dstack.ds_ident; + + const char *idname = ident ? ident : "(anon)"; + char n[DT_TYPE_NAMELEN]; + + dtrace_typeinfo_t dtt; + ctf_encoding_t cte; + ctf_id_t base; + uint_t kind; + ssize_t size; + + if (dsp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOSCOPE); + + if (ddp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NODECL); + + if (dnp == NULL && ident == NULL) + xyerror(D_DECL_MNAME, "member declaration requires a name\n"); + + if (ddp->dd_kind == CTF_K_UNKNOWN && ddp->dd_name == NULL) { + ddp->dd_kind = CTF_K_INTEGER; + (void) dt_decl_check(ddp); + } + + if (dt_decl_type(ddp, &dtt) != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + + if (ident != NULL && strchr(ident, '`') != NULL) { + xyerror(D_DECL_SCOPE, "D scoping operator may not be used " + "in a member name (%s)\n", ident); + } + + if (dtt.dtt_ctfp == DT_DYN_CTFP(yypcb->pcb_hdl) && + dtt.dtt_type == DT_DYN_TYPE(yypcb->pcb_hdl)) { + xyerror(D_DECL_DYNOBJ, + "cannot have dynamic member: %s\n", ident); + } + + base = ctf_type_resolve(dtt.dtt_ctfp, dtt.dtt_type); + kind = ctf_type_kind(dtt.dtt_ctfp, base); + size = ctf_type_size(dtt.dtt_ctfp, base); + + if (kind == CTF_K_FORWARD || ((kind == CTF_K_STRUCT || + kind == CTF_K_UNION) && size == 0)) { + xyerror(D_DECL_INCOMPLETE, "incomplete struct/union/enum %s: " + "%s\n", dt_type_name(dtt.dtt_ctfp, dtt.dtt_type, + n, sizeof (n)), ident); + } + + if (size == 0) + xyerror(D_DECL_VOIDOBJ, "cannot have void member: %s\n", ident); + + /* + * If a bit-field qualifier was part of the member declaration, create + * a new integer type of the same name and attributes as the base type + * and size equal to the specified number of bits. We reset 'dtt' to + * refer to this new bit-field type and continue on to add the member. + */ + if (dnp != NULL) { + dnp = dt_node_cook(dnp, DT_IDFLG_REF); + + /* + * A bit-field member with no declarator is permitted to have + * size zero and indicates that no more fields are to be packed + * into the current storage unit. We ignore these directives + * as the underlying ctf code currently does so for all fields. + */ + if (ident == NULL && dnp->dn_kind == DT_NODE_INT && + dnp->dn_value == 0) { + dt_node_free(dnp); + goto done; + } + + if (dt_node_is_posconst(dnp) == 0) { + xyerror(D_DECL_BFCONST, "positive integral constant " + "expression expected as bit-field size\n"); + } + + if (ctf_type_kind(dtt.dtt_ctfp, base) != CTF_K_INTEGER || + ctf_type_encoding(dtt.dtt_ctfp, base, &cte) == CTF_ERR || + IS_VOID(cte)) { + xyerror(D_DECL_BFTYPE, "invalid type for " + "bit-field: %s\n", idname); + } + + if (dnp->dn_value > cte.cte_bits) { + xyerror(D_DECL_BFSIZE, "bit-field too big " + "for type: %s\n", idname); + } + + cte.cte_offset = 0; + cte.cte_bits = (uint_t)dnp->dn_value; + + dtt.dtt_type = ctf_add_integer(dsp->ds_ctfp, + CTF_ADD_NONROOT, ctf_type_name(dtt.dtt_ctfp, + dtt.dtt_type, n, sizeof (n)), &cte); + + if (dtt.dtt_type == CTF_ERR || + ctf_update(dsp->ds_ctfp) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to create type for " + "member '%s': %s\n", idname, + ctf_errmsg(ctf_errno(dsp->ds_ctfp))); + } + + dtt.dtt_ctfp = dsp->ds_ctfp; + dt_node_free(dnp); + } + + /* + * If the member type is not defined in the same CTF container as the + * one associated with the current scope (i.e. the container for the + * struct or union itself) or its parent, copy the member type into + * this container and reset dtt to refer to the copied type. + */ + if (dtt.dtt_ctfp != dsp->ds_ctfp && + dtt.dtt_ctfp != ctf_parent_file(dsp->ds_ctfp)) { + + dtt.dtt_type = ctf_add_type(dsp->ds_ctfp, + dtt.dtt_ctfp, dtt.dtt_type); + dtt.dtt_ctfp = dsp->ds_ctfp; + + if (dtt.dtt_type == CTF_ERR || + ctf_update(dtt.dtt_ctfp) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to copy type of '%s': %s\n", + idname, ctf_errmsg(ctf_errno(dtt.dtt_ctfp))); + } + } + + if (ctf_add_member(dsp->ds_ctfp, dsp->ds_type, + ident, dtt.dtt_type) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to define member '%s': %s\n", + idname, ctf_errmsg(ctf_errno(dsp->ds_ctfp))); + } + +done: + free(ident); + yypcb->pcb_dstack.ds_ident = NULL; + dt_decl_reset(); +} + +/*ARGSUSED*/ +static int +dt_decl_hasmembers(const char *name, int value, void *private) +{ + return (1); /* abort search and return true if a member exists */ +} + +dt_decl_t * +dt_decl_enum(char *name) +{ + dt_decl_t *ddp = dt_decl_spec(CTF_K_ENUM, name); + char n[DT_TYPE_NAMELEN]; + ctf_file_t *ctfp; + ctf_id_t type; + uint_t flag; + + if (yypcb->pcb_idepth != 0) + ctfp = yypcb->pcb_hdl->dt_cdefs->dm_ctfp; + else + ctfp = yypcb->pcb_hdl->dt_ddefs->dm_ctfp; + + if (yypcb->pcb_dstack.ds_next != NULL) + flag = CTF_ADD_NONROOT; + else + flag = CTF_ADD_ROOT; + + (void) snprintf(n, sizeof (n), "enum %s", name ? name : "(anon)"); + + if (name != NULL && (type = ctf_lookup_by_name(ctfp, n)) != CTF_ERR) { + if (ctf_enum_iter(ctfp, type, dt_decl_hasmembers, NULL)) + xyerror(D_DECL_TYPERED, "type redeclared: %s\n", n); + } else if ((type = ctf_add_enum(ctfp, flag, name)) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to define %s: %s\n", + n, ctf_errmsg(ctf_errno(ctfp))); + } + + ddp->dd_ctfp = ctfp; + ddp->dd_type = type; + + dt_scope_push(ctfp, type); + return (ddp); +} + +void +dt_decl_enumerator(char *s, dt_node_t *dnp) +{ + dt_scope_t *dsp = yypcb->pcb_dstack.ds_next; + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + dt_idnode_t *inp; + dt_ident_t *idp; + char *name; + int value; + + name = alloca(strlen(s) + 1); + (void) strcpy(name, s); + free(s); + + if (dsp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOSCOPE); + + assert(dsp->ds_decl->dd_kind == CTF_K_ENUM); + value = dsp->ds_enumval + 1; /* default is previous value plus one */ + + if (strchr(name, '`') != NULL) { + xyerror(D_DECL_SCOPE, "D scoping operator may not be used in " + "an enumerator name (%s)\n", name); + } + + /* + * If the enumerator is being assigned a value, cook and check the node + * and then free it after we get the value. We also permit references + * to identifiers which are previously defined enumerators in the type. + */ + if (dnp != NULL) { + if (dnp->dn_kind != DT_NODE_IDENT || ctf_enum_value( + dsp->ds_ctfp, dsp->ds_type, dnp->dn_string, &value) != 0) { + dnp = dt_node_cook(dnp, DT_IDFLG_REF); + + if (dnp->dn_kind != DT_NODE_INT) { + xyerror(D_DECL_ENCONST, "enumerator '%s' must " + "be assigned to an integral constant " + "expression\n", name); + } + + if ((intmax_t)dnp->dn_value > INT_MAX || + (intmax_t)dnp->dn_value < INT_MIN) { + xyerror(D_DECL_ENOFLOW, "enumerator '%s' value " + "overflows INT_MAX (%d)\n", name, INT_MAX); + } + + value = (int)dnp->dn_value; + } + dt_node_free(dnp); + } + + if (ctf_add_enumerator(dsp->ds_ctfp, dsp->ds_type, + name, value) == CTF_ERR || ctf_update(dsp->ds_ctfp) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to define enumerator '%s': %s\n", + name, ctf_errmsg(ctf_errno(dsp->ds_ctfp))); + } + + dsp->ds_enumval = value; /* save most recent value */ + + /* + * If the enumerator name matches an identifier in the global scope, + * flag this as an error. We only do this for "D" enumerators to + * prevent "C" header file enumerators from conflicting with the ever- + * growing list of D built-in global variables and inlines. If a "C" + * enumerator conflicts with a global identifier, we add the enumerator + * but do not insert a corresponding inline (i.e. the D variable wins). + */ + if (dt_idstack_lookup(&yypcb->pcb_globals, name) != NULL) { + if (dsp->ds_ctfp == dtp->dt_ddefs->dm_ctfp) { + xyerror(D_DECL_IDRED, + "identifier redeclared: %s\n", name); + } else + return; + } + + dt_dprintf("add global enumerator %s = %d\n", name, value); + + idp = dt_idhash_insert(dtp->dt_globals, name, DT_IDENT_ENUM, + DT_IDFLG_INLINE | DT_IDFLG_REF, 0, _dtrace_defattr, 0, + &dt_idops_inline, NULL, dtp->dt_gen); + + if (idp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + yyintprefix = 0; + yyintsuffix[0] = '\0'; + yyintdecimal = 0; + + dnp = dt_node_int(value); + dt_node_type_assign(dnp, dsp->ds_ctfp, dsp->ds_type, B_FALSE); + + if ((inp = malloc(sizeof (dt_idnode_t))) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + /* + * Remove the INT node from the node allocation list and store it in + * din_list and din_root so it persists with and is freed by the ident. + */ + assert(yypcb->pcb_list == dnp); + yypcb->pcb_list = dnp->dn_link; + dnp->dn_link = NULL; + + bzero(inp, sizeof (dt_idnode_t)); + inp->din_list = dnp; + inp->din_root = dnp; + + idp->di_iarg = inp; + idp->di_ctfp = dsp->ds_ctfp; + idp->di_type = dsp->ds_type; +} + +/* + * Look up the type corresponding to the specified decl stack. The scoping of + * the underlying type names is handled by dt_type_lookup(). We build up the + * name from the specified string and prefixes and then lookup the type. If + * we fail, an errmsg is saved and the caller must abort with EDT_COMPILER. + */ +int +dt_decl_type(dt_decl_t *ddp, dtrace_typeinfo_t *tip) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + dt_module_t *dmp; + ctf_arinfo_t r; + ctf_id_t type; + + char n[DT_TYPE_NAMELEN]; + uint_t flag; + char *name; + int rv; + + tip->dtt_flags = 0; + + /* + * Based on our current #include depth and decl stack depth, determine + * which dynamic CTF module and scope to use when adding any new types. + */ + dmp = yypcb->pcb_idepth ? dtp->dt_cdefs : dtp->dt_ddefs; + flag = yypcb->pcb_dstack.ds_next ? CTF_ADD_NONROOT : CTF_ADD_ROOT; + + if (ddp->dd_attr & DT_DA_USER) + tip->dtt_flags = DTT_FL_USER; + + /* + * If we have already cached a CTF type for this decl, then we just + * return the type information for the cached type. + */ + if (ddp->dd_ctfp != NULL && + (dmp = dt_module_lookup_by_ctf(dtp, ddp->dd_ctfp)) != NULL) { + tip->dtt_object = dmp->dm_name; + tip->dtt_ctfp = ddp->dd_ctfp; + tip->dtt_type = ddp->dd_type; + return (0); + } + + /* + * Currently CTF treats all function pointers identically. We cache a + * representative ID of kind CTF_K_FUNCTION and just return that type. + * If we want to support full function declarations, dd_next refers to + * the declaration of the function return type, and the parameter list + * should be parsed and hung off a new pointer inside of this decl. + */ + if (ddp->dd_kind == CTF_K_FUNCTION) { + tip->dtt_object = dtp->dt_ddefs->dm_name; + tip->dtt_ctfp = DT_FUNC_CTFP(dtp); + tip->dtt_type = DT_FUNC_TYPE(dtp); + return (0); + } + + /* + * If the decl is a pointer, resolve the rest of the stack by calling + * dt_decl_type() recursively and then compute a pointer to the result. + * Similar to the code above, we return a cached id for function ptrs. + */ + if (ddp->dd_kind == CTF_K_POINTER) { + if (ddp->dd_next->dd_kind == CTF_K_FUNCTION) { + tip->dtt_object = dtp->dt_ddefs->dm_name; + tip->dtt_ctfp = DT_FPTR_CTFP(dtp); + tip->dtt_type = DT_FPTR_TYPE(dtp); + return (0); + } + + if ((rv = dt_decl_type(ddp->dd_next, tip)) == 0 && + (rv = dt_type_pointer(tip)) != 0) { + xywarn(D_UNKNOWN, "cannot find type: %s*: %s\n", + dt_type_name(tip->dtt_ctfp, tip->dtt_type, + n, sizeof (n)), ctf_errmsg(dtp->dt_ctferr)); + } + + return (rv); + } + + /* + * If the decl is an array, we must find the base type and then call + * dt_decl_type() recursively and then build an array of the result. + * The C and D multi-dimensional array syntax requires that consecutive + * array declarations be processed from right-to-left (i.e. top-down + * from the perspective of the declaration stack). For example, an + * array declaration such as int x[3][5] is stored on the stack as: + * + * (bottom) NULL <- ( INT "int" ) <- ( ARR [3] ) <- ( ARR [5] ) (top) + * + * but means that x is declared to be an array of 3 objects each of + * which is an array of 5 integers, or in CTF representation: + * + * type T1:( content=int, nelems=5 ) type T2:( content=T1, nelems=3 ) + * + * For more details, refer to K&R[5.7] and ISO C 6.5.2.1. Rather than + * overcomplicate the implementation of dt_decl_type(), we push array + * declarations down into the stack in dt_decl_array(), above, so that + * by the time dt_decl_type() is called, the decl stack looks like: + * + * (bottom) NULL <- ( INT "int" ) <- ( ARR [5] ) <- ( ARR [3] ) (top) + * + * which permits a straightforward recursive descent of the decl stack + * to build the corresponding CTF type tree in the appropriate order. + */ + if (ddp->dd_kind == CTF_K_ARRAY) { + /* + * If the array decl has a parameter list associated with it, + * this is an associative array declaration: return <DYN>. + */ + if (ddp->dd_node != NULL && + ddp->dd_node->dn_kind == DT_NODE_TYPE) { + tip->dtt_object = dtp->dt_ddefs->dm_name; + tip->dtt_ctfp = DT_DYN_CTFP(dtp); + tip->dtt_type = DT_DYN_TYPE(dtp); + return (0); + } + + if ((rv = dt_decl_type(ddp->dd_next, tip)) != 0) + return (rv); + + /* + * If the array base type is not defined in the target + * container or its parent, copy the type to the target + * container and reset dtt_ctfp and dtt_type to the copy. + */ + if (tip->dtt_ctfp != dmp->dm_ctfp && + tip->dtt_ctfp != ctf_parent_file(dmp->dm_ctfp)) { + + tip->dtt_type = ctf_add_type(dmp->dm_ctfp, + tip->dtt_ctfp, tip->dtt_type); + tip->dtt_ctfp = dmp->dm_ctfp; + + if (tip->dtt_type == CTF_ERR || + ctf_update(tip->dtt_ctfp) == CTF_ERR) { + xywarn(D_UNKNOWN, "failed to copy type: %s\n", + ctf_errmsg(ctf_errno(tip->dtt_ctfp))); + return (-1); + } + } + + /* + * The array index type is irrelevant in C and D: just set it + * to "long" for all array types that we create on-the-fly. + */ + r.ctr_contents = tip->dtt_type; + r.ctr_index = ctf_lookup_by_name(tip->dtt_ctfp, "long"); + r.ctr_nelems = ddp->dd_node ? + (uint_t)ddp->dd_node->dn_value : 0; + + tip->dtt_object = dmp->dm_name; + tip->dtt_ctfp = dmp->dm_ctfp; + tip->dtt_type = ctf_add_array(dmp->dm_ctfp, CTF_ADD_ROOT, &r); + + if (tip->dtt_type == CTF_ERR || + ctf_update(tip->dtt_ctfp) == CTF_ERR) { + xywarn(D_UNKNOWN, "failed to create array type: %s\n", + ctf_errmsg(ctf_errno(tip->dtt_ctfp))); + return (-1); + } + + return (0); + } + + /* + * Allocate space for the type name and enough space for the maximum + * additional text ("unsigned long long \0" requires 20 more bytes). + */ + name = alloca(ddp->dd_name ? strlen(ddp->dd_name) + 20 : 20); + name[0] = '\0'; + + switch (ddp->dd_kind) { + case CTF_K_INTEGER: + case CTF_K_FLOAT: + if (ddp->dd_attr & DT_DA_SIGNED) + (void) strcat(name, "signed "); + if (ddp->dd_attr & DT_DA_UNSIGNED) + (void) strcat(name, "unsigned "); + if (ddp->dd_attr & DT_DA_SHORT) + (void) strcat(name, "short "); + if (ddp->dd_attr & DT_DA_LONG) + (void) strcat(name, "long "); + if (ddp->dd_attr & DT_DA_LONGLONG) + (void) strcat(name, "long long "); + if (ddp->dd_attr == 0 && ddp->dd_name == NULL) + (void) strcat(name, "int"); + break; + case CTF_K_STRUCT: + (void) strcpy(name, "struct "); + break; + case CTF_K_UNION: + (void) strcpy(name, "union "); + break; + case CTF_K_ENUM: + (void) strcpy(name, "enum "); + break; + case CTF_K_TYPEDEF: + break; + default: + xywarn(D_UNKNOWN, "internal error -- " + "bad decl kind %u\n", ddp->dd_kind); + return (-1); + } + + /* + * Add dd_name unless a short, long, or long long is explicitly + * suffixed by int. We use the C/CTF canonical names for integers. + */ + if (ddp->dd_name != NULL && (ddp->dd_kind != CTF_K_INTEGER || + (ddp->dd_attr & (DT_DA_SHORT | DT_DA_LONG | DT_DA_LONGLONG)) == 0)) + (void) strcat(name, ddp->dd_name); + + /* + * Lookup the type. If we find it, we're done. Otherwise create a + * forward tag for the type if it is a struct, union, or enum. If + * we can't find it and we can't create a tag, return failure. + */ + if ((rv = dt_type_lookup(name, tip)) == 0) + return (rv); + + switch (ddp->dd_kind) { + case CTF_K_STRUCT: + case CTF_K_UNION: + case CTF_K_ENUM: + type = ctf_add_forward(dmp->dm_ctfp, flag, + ddp->dd_name, ddp->dd_kind); + break; + default: + xywarn(D_UNKNOWN, "failed to resolve type %s: %s\n", name, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + return (rv); + } + + if (type == CTF_ERR || ctf_update(dmp->dm_ctfp) == CTF_ERR) { + xywarn(D_UNKNOWN, "failed to add forward tag for %s: %s\n", + name, ctf_errmsg(ctf_errno(dmp->dm_ctfp))); + return (-1); + } + + ddp->dd_ctfp = dmp->dm_ctfp; + ddp->dd_type = type; + + tip->dtt_object = dmp->dm_name; + tip->dtt_ctfp = dmp->dm_ctfp; + tip->dtt_type = type; + + return (0); +} + +void +dt_scope_create(dt_scope_t *dsp) +{ + dsp->ds_decl = NULL; + dsp->ds_next = NULL; + dsp->ds_ident = NULL; + dsp->ds_ctfp = NULL; + dsp->ds_type = CTF_ERR; + dsp->ds_class = DT_DC_DEFAULT; + dsp->ds_enumval = -1; +} + +void +dt_scope_destroy(dt_scope_t *dsp) +{ + dt_scope_t *nsp; + + for (; dsp != NULL; dsp = nsp) { + dt_decl_free(dsp->ds_decl); + free(dsp->ds_ident); + nsp = dsp->ds_next; + if (dsp != &yypcb->pcb_dstack) + free(dsp); + } +} + +void +dt_scope_push(ctf_file_t *ctfp, ctf_id_t type) +{ + dt_scope_t *rsp = &yypcb->pcb_dstack; + dt_scope_t *dsp = malloc(sizeof (dt_scope_t)); + + if (dsp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dsp->ds_decl = rsp->ds_decl; + dsp->ds_next = rsp->ds_next; + dsp->ds_ident = rsp->ds_ident; + dsp->ds_ctfp = ctfp; + dsp->ds_type = type; + dsp->ds_class = rsp->ds_class; + dsp->ds_enumval = rsp->ds_enumval; + + dt_scope_create(rsp); + rsp->ds_next = dsp; +} + +dt_decl_t * +dt_scope_pop(void) +{ + dt_scope_t *rsp = &yypcb->pcb_dstack; + dt_scope_t *dsp = rsp->ds_next; + + if (dsp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOSCOPE); + + if (dsp->ds_ctfp != NULL && ctf_update(dsp->ds_ctfp) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to update type definitions: %s\n", + ctf_errmsg(ctf_errno(dsp->ds_ctfp))); + } + + dt_decl_free(rsp->ds_decl); + free(rsp->ds_ident); + + rsp->ds_decl = dsp->ds_decl; + rsp->ds_next = dsp->ds_next; + rsp->ds_ident = dsp->ds_ident; + rsp->ds_ctfp = dsp->ds_ctfp; + rsp->ds_type = dsp->ds_type; + rsp->ds_class = dsp->ds_class; + rsp->ds_enumval = dsp->ds_enumval; + + free(dsp); + return (rsp->ds_decl); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_decl.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_decl.h new file mode 100644 index 0000000..d322875 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_decl.h @@ -0,0 +1,129 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. + */ + +#ifndef _DT_DECL_H +#define _DT_DECL_H + +#include <sys/types.h> +#include <libctf.h> +#include <dtrace.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dt_node; /* forward declaration of dt_node_t */ + +typedef struct dt_decl { + ushort_t dd_kind; /* declaration kind (CTF_K_* kind) */ + ushort_t dd_attr; /* attributes (DT_DA_* flags) */ + ctf_file_t *dd_ctfp; /* CTF container for decl's type */ + ctf_id_t dd_type; /* CTF identifier for decl's type */ + char *dd_name; /* string name of this decl (or NULL) */ + struct dt_node *dd_node; /* node for array size or parm list */ + struct dt_decl *dd_next; /* next declaration in list */ +} dt_decl_t; + +#define DT_DA_SIGNED 0x0001 /* signed integer value */ +#define DT_DA_UNSIGNED 0x0002 /* unsigned integer value */ +#define DT_DA_SHORT 0x0004 /* short integer value */ +#define DT_DA_LONG 0x0008 /* long integer or double */ +#define DT_DA_LONGLONG 0x0010 /* long long integer value */ +#define DT_DA_CONST 0x0020 /* qualify type as const */ +#define DT_DA_RESTRICT 0x0040 /* qualify type as restrict */ +#define DT_DA_VOLATILE 0x0080 /* qualify type as volatile */ +#define DT_DA_PAREN 0x0100 /* parenthesis tag */ +#define DT_DA_USER 0x0200 /* user-land type specifier */ + +typedef enum dt_dclass { + DT_DC_DEFAULT, /* no storage class specified */ + DT_DC_AUTO, /* automatic storage */ + DT_DC_REGISTER, /* register storage */ + DT_DC_STATIC, /* static storage */ + DT_DC_EXTERN, /* extern storage */ + DT_DC_TYPEDEF, /* type definition */ + DT_DC_SELF, /* thread-local storage */ + DT_DC_THIS /* clause-local storage */ +} dt_dclass_t; + +typedef struct dt_scope { + dt_decl_t *ds_decl; /* pointer to top of decl stack */ + struct dt_scope *ds_next; /* pointer to next scope */ + char *ds_ident; /* identifier for this scope (if any) */ + ctf_file_t *ds_ctfp; /* CTF container for this scope */ + ctf_id_t ds_type; /* CTF id of enclosing type */ + dt_dclass_t ds_class; /* declaration class for this scope */ + int ds_enumval; /* most recent enumerator value */ +} dt_scope_t; + +extern dt_decl_t *dt_decl_alloc(ushort_t, char *); +extern void dt_decl_free(dt_decl_t *); +extern void dt_decl_reset(void); +extern dt_decl_t *dt_decl_push(dt_decl_t *); +extern dt_decl_t *dt_decl_pop(void); +extern dt_decl_t *dt_decl_pop_param(char **); +extern dt_decl_t *dt_decl_top(void); + +extern dt_decl_t *dt_decl_ident(char *); +extern void dt_decl_class(dt_dclass_t); + +#define DT_DP_VARARGS 0x1 /* permit varargs in prototype */ +#define DT_DP_DYNAMIC 0x2 /* permit dynamic type in prototype */ +#define DT_DP_VOID 0x4 /* permit void type in prototype */ +#define DT_DP_ANON 0x8 /* permit anonymous parameters */ + +extern int dt_decl_prototype(struct dt_node *, struct dt_node *, + const char *, uint_t); + +extern dt_decl_t *dt_decl_spec(ushort_t, char *); +extern dt_decl_t *dt_decl_attr(ushort_t); +extern dt_decl_t *dt_decl_array(struct dt_node *); +extern dt_decl_t *dt_decl_func(dt_decl_t *, struct dt_node *); +extern dt_decl_t *dt_decl_ptr(void); + +extern dt_decl_t *dt_decl_sou(uint_t, char *); +extern void dt_decl_member(struct dt_node *); + +extern dt_decl_t *dt_decl_enum(char *); +extern void dt_decl_enumerator(char *, struct dt_node *); + +extern int dt_decl_type(dt_decl_t *, dtrace_typeinfo_t *); + +extern void dt_scope_create(dt_scope_t *); +extern void dt_scope_destroy(dt_scope_t *); +extern void dt_scope_push(ctf_file_t *, ctf_id_t); +extern dt_decl_t *dt_scope_pop(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_DECL_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dis.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dis.c new file mode 100644 index 0000000..c0af364 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dis.c @@ -0,0 +1,526 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. + */ + +#include <strings.h> +#include <stdio.h> + +#include <dt_impl.h> +#include <dt_ident.h> + +/*ARGSUSED*/ +static void +dt_dis_log(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%-4s %%r%u, %%r%u, %%r%u", name, + DIF_INSTR_R1(in), DIF_INSTR_R2(in), DIF_INSTR_RD(in)); +} + +/*ARGSUSED*/ +static void +dt_dis_branch(const dtrace_difo_t *dp, const char *name, + dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%-4s %u", name, DIF_INSTR_LABEL(in)); +} + +/*ARGSUSED*/ +static void +dt_dis_load(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%-4s [%%r%u], %%r%u", name, + DIF_INSTR_R1(in), DIF_INSTR_RD(in)); +} + +/*ARGSUSED*/ +static void +dt_dis_store(const dtrace_difo_t *dp, const char *name, + dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%-4s %%r%u, [%%r%u]", name, + DIF_INSTR_R1(in), DIF_INSTR_RD(in)); +} + +/*ARGSUSED*/ +static void +dt_dis_str(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%s", name); +} + +/*ARGSUSED*/ +static void +dt_dis_r1rd(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%-4s %%r%u, %%r%u", name, + DIF_INSTR_R1(in), DIF_INSTR_RD(in)); +} + +/*ARGSUSED*/ +static void +dt_dis_cmp(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%-4s %%r%u, %%r%u", name, + DIF_INSTR_R1(in), DIF_INSTR_R2(in)); +} + +/*ARGSUSED*/ +static void +dt_dis_tst(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%-4s %%r%u", name, DIF_INSTR_R1(in)); +} + +static const char * +dt_dis_varname(const dtrace_difo_t *dp, uint_t id, uint_t scope) +{ + const dtrace_difv_t *dvp = dp->dtdo_vartab; + uint_t i; + + for (i = 0; i < dp->dtdo_varlen; i++, dvp++) { + if (dvp->dtdv_id == id && dvp->dtdv_scope == scope) { + if (dvp->dtdv_name < dp->dtdo_strlen) + return (dp->dtdo_strtab + dvp->dtdv_name); + break; + } + } + + return (NULL); +} + +static uint_t +dt_dis_scope(const char *name) +{ + switch (name[2]) { + case 'l': return (DIFV_SCOPE_LOCAL); + case 't': return (DIFV_SCOPE_THREAD); + case 'g': return (DIFV_SCOPE_GLOBAL); + default: return (-1u); + } +} + +static void +dt_dis_lda(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + uint_t var = DIF_INSTR_R1(in); + const char *vname; + + (void) fprintf(fp, "%-4s DT_VAR(%u), %%r%u, %%r%u", name, + var, DIF_INSTR_R2(in), DIF_INSTR_RD(in)); + + if ((vname = dt_dis_varname(dp, var, dt_dis_scope(name))) != NULL) + (void) fprintf(fp, "\t\t! DT_VAR(%u) = \"%s\"", var, vname); +} + +static void +dt_dis_ldv(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + uint_t var = DIF_INSTR_VAR(in); + const char *vname; + + (void) fprintf(fp, "%-4s DT_VAR(%u), %%r%u", + name, var, DIF_INSTR_RD(in)); + + if ((vname = dt_dis_varname(dp, var, dt_dis_scope(name))) != NULL) + (void) fprintf(fp, "\t\t! DT_VAR(%u) = \"%s\"", var, vname); +} + +static void +dt_dis_stv(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + uint_t var = DIF_INSTR_VAR(in); + const char *vname; + + (void) fprintf(fp, "%-4s %%r%u, DT_VAR(%u)", + name, DIF_INSTR_RS(in), var); + + if ((vname = dt_dis_varname(dp, var, dt_dis_scope(name))) != NULL) + (void) fprintf(fp, "\t\t! DT_VAR(%u) = \"%s\"", var, vname); +} + +static void +dt_dis_setx(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + uint_t intptr = DIF_INSTR_INTEGER(in); + + (void) fprintf(fp, "%-4s DT_INTEGER[%u], %%r%u", name, + intptr, DIF_INSTR_RD(in)); + + if (intptr < dp->dtdo_intlen) { + (void) fprintf(fp, "\t\t! 0x%llx", + (u_longlong_t)dp->dtdo_inttab[intptr]); + } +} + +static void +dt_dis_sets(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + uint_t strptr = DIF_INSTR_STRING(in); + + (void) fprintf(fp, "%-4s DT_STRING[%u], %%r%u", name, + strptr, DIF_INSTR_RD(in)); + + if (strptr < dp->dtdo_strlen) + (void) fprintf(fp, "\t\t! \"%s\"", dp->dtdo_strtab + strptr); +} + +/*ARGSUSED*/ +static void +dt_dis_ret(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + (void) fprintf(fp, "%-4s %%r%u", name, DIF_INSTR_RD(in)); +} + +/*ARGSUSED*/ +static void +dt_dis_call(const dtrace_difo_t *dp, const char *name, dif_instr_t in, FILE *fp) +{ + uint_t subr = DIF_INSTR_SUBR(in); + + (void) fprintf(fp, "%-4s DIF_SUBR(%u), %%r%u\t\t! %s", + name, subr, DIF_INSTR_RD(in), dtrace_subrstr(NULL, subr)); +} + +/*ARGSUSED*/ +static void +dt_dis_pushts(const dtrace_difo_t *dp, + const char *name, dif_instr_t in, FILE *fp) +{ + static const char *const tnames[] = { "D type", "string" }; + uint_t type = DIF_INSTR_TYPE(in); + const char *pad; + + if (DIF_INSTR_OP(in) == DIF_OP_PUSHTV) { + (void) fprintf(fp, "%-4s DT_TYPE(%u), %%r%u", + name, type, DIF_INSTR_RS(in)); + pad = "\t\t"; + } else { + (void) fprintf(fp, "%-4s DT_TYPE(%u), %%r%u, %%r%u", + name, type, DIF_INSTR_R2(in), DIF_INSTR_RS(in)); + pad = "\t"; + } + + if (type < sizeof (tnames) / sizeof (tnames[0])) { + (void) fprintf(fp, "%s! DT_TYPE(%u) = %s", pad, + type, tnames[type]); + } +} + +static void +dt_dis_xlate(const dtrace_difo_t *dp, + const char *name, dif_instr_t in, FILE *fp) +{ + uint_t xlr = DIF_INSTR_XLREF(in); + + (void) fprintf(fp, "%-4s DT_XLREF[%u], %%r%u", + name, xlr, DIF_INSTR_RD(in)); + + if (xlr < dp->dtdo_xlmlen) { + (void) fprintf(fp, "\t\t! DT_XLREF[%u] = %u.%s", xlr, + (uint_t)dp->dtdo_xlmtab[xlr]->dn_membexpr->dn_xlator->dx_id, + dp->dtdo_xlmtab[xlr]->dn_membname); + } +} + +static char * +dt_dis_typestr(const dtrace_diftype_t *t, char *buf, size_t len) +{ + char kind[16], ckind[16]; + + switch (t->dtdt_kind) { + case DIF_TYPE_CTF: + (void) strcpy(kind, "D type"); + break; + case DIF_TYPE_STRING: + (void) strcpy(kind, "string"); + break; + default: + (void) snprintf(kind, sizeof (kind), "0x%x", t->dtdt_kind); + } + + switch (t->dtdt_ckind) { + case CTF_K_UNKNOWN: + (void) strcpy(ckind, "unknown"); + break; + case CTF_K_INTEGER: + (void) strcpy(ckind, "integer"); + break; + case CTF_K_FLOAT: + (void) strcpy(ckind, "float"); + break; + case CTF_K_POINTER: + (void) strcpy(ckind, "pointer"); + break; + case CTF_K_ARRAY: + (void) strcpy(ckind, "array"); + break; + case CTF_K_FUNCTION: + (void) strcpy(ckind, "function"); + break; + case CTF_K_STRUCT: + (void) strcpy(ckind, "struct"); + break; + case CTF_K_UNION: + (void) strcpy(ckind, "union"); + break; + case CTF_K_ENUM: + (void) strcpy(ckind, "enum"); + break; + case CTF_K_FORWARD: + (void) strcpy(ckind, "forward"); + break; + case CTF_K_TYPEDEF: + (void) strcpy(ckind, "typedef"); + break; + case CTF_K_VOLATILE: + (void) strcpy(ckind, "volatile"); + break; + case CTF_K_CONST: + (void) strcpy(ckind, "const"); + break; + case CTF_K_RESTRICT: + (void) strcpy(ckind, "restrict"); + break; + default: + (void) snprintf(ckind, sizeof (ckind), "0x%x", t->dtdt_ckind); + } + + if (t->dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF)) { + (void) snprintf(buf, len, "%s (%s) by %sref (size %lu)", + kind, ckind, (t->dtdt_flags & DIF_TF_BYUREF) ? "user " : "", + (ulong_t)t->dtdt_size); + } else { + (void) snprintf(buf, len, "%s (%s) (size %lu)", + kind, ckind, (ulong_t)t->dtdt_size); + } + + return (buf); +} + +static void +dt_dis_rtab(const char *rtag, const dtrace_difo_t *dp, FILE *fp, + const dof_relodesc_t *rp, uint32_t len) +{ + (void) fprintf(fp, "\n%-4s %-8s %-8s %s\n", + rtag, "OFFSET", "DATA", "NAME"); + + for (; len != 0; len--, rp++) { + (void) fprintf(fp, "%-4u %-8llu %-8llu %s\n", + rp->dofr_type, (u_longlong_t)rp->dofr_offset, + (u_longlong_t)rp->dofr_data, + &dp->dtdo_strtab[rp->dofr_name]); + } +} + +void +dt_dis(const dtrace_difo_t *dp, FILE *fp) +{ + static const struct opent { + const char *op_name; + void (*op_func)(const dtrace_difo_t *, const char *, + dif_instr_t, FILE *); + } optab[] = { + { "(illegal opcode)", dt_dis_str }, + { "or", dt_dis_log }, /* DIF_OP_OR */ + { "xor", dt_dis_log }, /* DIF_OP_XOR */ + { "and", dt_dis_log }, /* DIF_OP_AND */ + { "sll", dt_dis_log }, /* DIF_OP_SLL */ + { "srl", dt_dis_log }, /* DIF_OP_SRL */ + { "sub", dt_dis_log }, /* DIF_OP_SUB */ + { "add", dt_dis_log }, /* DIF_OP_ADD */ + { "mul", dt_dis_log }, /* DIF_OP_MUL */ + { "sdiv", dt_dis_log }, /* DIF_OP_SDIV */ + { "udiv", dt_dis_log }, /* DIF_OP_UDIV */ + { "srem", dt_dis_log }, /* DIF_OP_SREM */ + { "urem", dt_dis_log }, /* DIF_OP_UREM */ + { "not", dt_dis_r1rd }, /* DIF_OP_NOT */ + { "mov", dt_dis_r1rd }, /* DIF_OP_MOV */ + { "cmp", dt_dis_cmp }, /* DIF_OP_CMP */ + { "tst", dt_dis_tst }, /* DIF_OP_TST */ + { "ba", dt_dis_branch }, /* DIF_OP_BA */ + { "be", dt_dis_branch }, /* DIF_OP_BE */ + { "bne", dt_dis_branch }, /* DIF_OP_BNE */ + { "bg", dt_dis_branch }, /* DIF_OP_BG */ + { "bgu", dt_dis_branch }, /* DIF_OP_BGU */ + { "bge", dt_dis_branch }, /* DIF_OP_BGE */ + { "bgeu", dt_dis_branch }, /* DIF_OP_BGEU */ + { "bl", dt_dis_branch }, /* DIF_OP_BL */ + { "blu", dt_dis_branch }, /* DIF_OP_BLU */ + { "ble", dt_dis_branch }, /* DIF_OP_BLE */ + { "bleu", dt_dis_branch }, /* DIF_OP_BLEU */ + { "ldsb", dt_dis_load }, /* DIF_OP_LDSB */ + { "ldsh", dt_dis_load }, /* DIF_OP_LDSH */ + { "ldsw", dt_dis_load }, /* DIF_OP_LDSW */ + { "ldub", dt_dis_load }, /* DIF_OP_LDUB */ + { "lduh", dt_dis_load }, /* DIF_OP_LDUH */ + { "lduw", dt_dis_load }, /* DIF_OP_LDUW */ + { "ldx", dt_dis_load }, /* DIF_OP_LDX */ + { "ret", dt_dis_ret }, /* DIF_OP_RET */ + { "nop", dt_dis_str }, /* DIF_OP_NOP */ + { "setx", dt_dis_setx }, /* DIF_OP_SETX */ + { "sets", dt_dis_sets }, /* DIF_OP_SETS */ + { "scmp", dt_dis_cmp }, /* DIF_OP_SCMP */ + { "ldga", dt_dis_lda }, /* DIF_OP_LDGA */ + { "ldgs", dt_dis_ldv }, /* DIF_OP_LDGS */ + { "stgs", dt_dis_stv }, /* DIF_OP_STGS */ + { "ldta", dt_dis_lda }, /* DIF_OP_LDTA */ + { "ldts", dt_dis_ldv }, /* DIF_OP_LDTS */ + { "stts", dt_dis_stv }, /* DIF_OP_STTS */ + { "sra", dt_dis_log }, /* DIF_OP_SRA */ + { "call", dt_dis_call }, /* DIF_OP_CALL */ + { "pushtr", dt_dis_pushts }, /* DIF_OP_PUSHTR */ + { "pushtv", dt_dis_pushts }, /* DIF_OP_PUSHTV */ + { "popts", dt_dis_str }, /* DIF_OP_POPTS */ + { "flushts", dt_dis_str }, /* DIF_OP_FLUSHTS */ + { "ldgaa", dt_dis_ldv }, /* DIF_OP_LDGAA */ + { "ldtaa", dt_dis_ldv }, /* DIF_OP_LDTAA */ + { "stgaa", dt_dis_stv }, /* DIF_OP_STGAA */ + { "sttaa", dt_dis_stv }, /* DIF_OP_STTAA */ + { "ldls", dt_dis_ldv }, /* DIF_OP_LDLS */ + { "stls", dt_dis_stv }, /* DIF_OP_STLS */ + { "allocs", dt_dis_r1rd }, /* DIF_OP_ALLOCS */ + { "copys", dt_dis_log }, /* DIF_OP_COPYS */ + { "stb", dt_dis_store }, /* DIF_OP_STB */ + { "sth", dt_dis_store }, /* DIF_OP_STH */ + { "stw", dt_dis_store }, /* DIF_OP_STW */ + { "stx", dt_dis_store }, /* DIF_OP_STX */ + { "uldsb", dt_dis_load }, /* DIF_OP_ULDSB */ + { "uldsh", dt_dis_load }, /* DIF_OP_ULDSH */ + { "uldsw", dt_dis_load }, /* DIF_OP_ULDSW */ + { "uldub", dt_dis_load }, /* DIF_OP_ULDUB */ + { "ulduh", dt_dis_load }, /* DIF_OP_ULDUH */ + { "ulduw", dt_dis_load }, /* DIF_OP_ULDUW */ + { "uldx", dt_dis_load }, /* DIF_OP_ULDX */ + { "rldsb", dt_dis_load }, /* DIF_OP_RLDSB */ + { "rldsh", dt_dis_load }, /* DIF_OP_RLDSH */ + { "rldsw", dt_dis_load }, /* DIF_OP_RLDSW */ + { "rldub", dt_dis_load }, /* DIF_OP_RLDUB */ + { "rlduh", dt_dis_load }, /* DIF_OP_RLDUH */ + { "rlduw", dt_dis_load }, /* DIF_OP_RLDUW */ + { "rldx", dt_dis_load }, /* DIF_OP_RLDX */ + { "xlate", dt_dis_xlate }, /* DIF_OP_XLATE */ + { "xlarg", dt_dis_xlate }, /* DIF_OP_XLARG */ + }; + + const struct opent *op; + ulong_t i = 0; + char type[DT_TYPE_NAMELEN]; + + (void) fprintf(fp, "\nDIFO 0x%p returns %s\n", (void *)dp, + dt_dis_typestr(&dp->dtdo_rtype, type, sizeof (type))); + + (void) fprintf(fp, "%-3s %-8s %s\n", + "OFF", "OPCODE", "INSTRUCTION"); + + for (i = 0; i < dp->dtdo_len; i++) { + dif_instr_t instr = dp->dtdo_buf[i]; + dif_instr_t opcode = DIF_INSTR_OP(instr); + + if (opcode >= sizeof (optab) / sizeof (optab[0])) + opcode = 0; /* force invalid opcode message */ + + op = &optab[opcode]; + (void) fprintf(fp, "%02lu: %08x ", i, instr); + op->op_func(dp, op->op_name, instr, fp); + (void) fprintf(fp, "\n"); + } + + if (dp->dtdo_varlen != 0) { + (void) fprintf(fp, "\n%-16s %-4s %-3s %-3s %-4s %s\n", + "NAME", "ID", "KND", "SCP", "FLAG", "TYPE"); + } + + for (i = 0; i < dp->dtdo_varlen; i++) { + dtrace_difv_t *v = &dp->dtdo_vartab[i]; + char kind[4], scope[4], flags[16] = { 0 }; + + switch (v->dtdv_kind) { + case DIFV_KIND_ARRAY: + (void) strcpy(kind, "arr"); + break; + case DIFV_KIND_SCALAR: + (void) strcpy(kind, "scl"); + break; + default: + (void) snprintf(kind, sizeof (kind), + "%u", v->dtdv_kind); + } + + switch (v->dtdv_scope) { + case DIFV_SCOPE_GLOBAL: + (void) strcpy(scope, "glb"); + break; + case DIFV_SCOPE_THREAD: + (void) strcpy(scope, "tls"); + break; + case DIFV_SCOPE_LOCAL: + (void) strcpy(scope, "loc"); + break; + default: + (void) snprintf(scope, sizeof (scope), + "%u", v->dtdv_scope); + } + + if (v->dtdv_flags & ~(DIFV_F_REF | DIFV_F_MOD)) { + (void) snprintf(flags, sizeof (flags), "/0x%x", + v->dtdv_flags & ~(DIFV_F_REF | DIFV_F_MOD)); + } + + if (v->dtdv_flags & DIFV_F_REF) + (void) strcat(flags, "/r"); + if (v->dtdv_flags & DIFV_F_MOD) + (void) strcat(flags, "/w"); + + (void) fprintf(fp, "%-16s %-4x %-3s %-3s %-4s %s\n", + &dp->dtdo_strtab[v->dtdv_name], + v->dtdv_id, kind, scope, flags + 1, + dt_dis_typestr(&v->dtdv_type, type, sizeof (type))); + } + + if (dp->dtdo_xlmlen != 0) { + (void) fprintf(fp, "\n%-4s %-3s %-12s %s\n", + "XLID", "ARG", "MEMBER", "TYPE"); + } + + for (i = 0; i < dp->dtdo_xlmlen; i++) { + dt_node_t *dnp = dp->dtdo_xlmtab[i]; + dt_xlator_t *dxp = dnp->dn_membexpr->dn_xlator; + (void) fprintf(fp, "%-4u %-3d %-12s %s\n", + (uint_t)dxp->dx_id, dxp->dx_arg, dnp->dn_membname, + dt_node_type_name(dnp, type, sizeof (type))); + } + + if (dp->dtdo_krelen != 0) + dt_dis_rtab("KREL", dp, fp, dp->dtdo_kreltab, dp->dtdo_krelen); + + if (dp->dtdo_urelen != 0) + dt_dis_rtab("UREL", dp, fp, dp->dtdo_ureltab, dp->dtdo_urelen); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.c new file mode 100644 index 0000000..0b531c5 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.c @@ -0,0 +1,986 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <sys/types.h> +#if defined(sun) +#include <sys/sysmacros.h> +#endif + +#include <strings.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <assert.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> + +#include <dt_impl.h> +#include <dt_strtab.h> +#include <dt_program.h> +#include <dt_provider.h> +#include <dt_xlator.h> +#include <dt_dof.h> + +void +dt_dof_init(dtrace_hdl_t *dtp) +{ + dt_dof_t *ddo = &dtp->dt_dof; + + ddo->ddo_hdl = dtp; + ddo->ddo_nsecs = 0; + ddo->ddo_strsec = DOF_SECIDX_NONE; + ddo->ddo_xlimport = NULL; + ddo->ddo_xlexport = NULL; + + dt_buf_create(dtp, &ddo->ddo_secs, "section headers", 0); + dt_buf_create(dtp, &ddo->ddo_strs, "string table", 0); + dt_buf_create(dtp, &ddo->ddo_ldata, "loadable data", 0); + dt_buf_create(dtp, &ddo->ddo_udata, "unloadable data", 0); + + dt_buf_create(dtp, &ddo->ddo_probes, "probe data", 0); + dt_buf_create(dtp, &ddo->ddo_args, "probe args", 0); + dt_buf_create(dtp, &ddo->ddo_offs, "probe offs", 0); + dt_buf_create(dtp, &ddo->ddo_enoffs, "probe is-enabled offs", 0); + dt_buf_create(dtp, &ddo->ddo_rels, "probe rels", 0); + + dt_buf_create(dtp, &ddo->ddo_xlms, "xlate members", 0); +} + +void +dt_dof_fini(dtrace_hdl_t *dtp) +{ + dt_dof_t *ddo = &dtp->dt_dof; + + dt_free(dtp, ddo->ddo_xlimport); + dt_free(dtp, ddo->ddo_xlexport); + + dt_buf_destroy(dtp, &ddo->ddo_secs); + dt_buf_destroy(dtp, &ddo->ddo_strs); + dt_buf_destroy(dtp, &ddo->ddo_ldata); + dt_buf_destroy(dtp, &ddo->ddo_udata); + + dt_buf_destroy(dtp, &ddo->ddo_probes); + dt_buf_destroy(dtp, &ddo->ddo_args); + dt_buf_destroy(dtp, &ddo->ddo_offs); + dt_buf_destroy(dtp, &ddo->ddo_enoffs); + dt_buf_destroy(dtp, &ddo->ddo_rels); + + dt_buf_destroy(dtp, &ddo->ddo_xlms); +} + +static int +dt_dof_reset(dtrace_hdl_t *dtp, dtrace_prog_t *pgp) +{ + dt_dof_t *ddo = &dtp->dt_dof; + uint_t i, nx = dtp->dt_xlatorid; + + assert(ddo->ddo_hdl == dtp); + ddo->ddo_pgp = pgp; + + ddo->ddo_nsecs = 0; + ddo->ddo_strsec = DOF_SECIDX_NONE; + + dt_free(dtp, ddo->ddo_xlimport); + dt_free(dtp, ddo->ddo_xlexport); + + ddo->ddo_xlimport = dt_alloc(dtp, sizeof (dof_secidx_t) * nx); + ddo->ddo_xlexport = dt_alloc(dtp, sizeof (dof_secidx_t) * nx); + + if (nx != 0 && (ddo->ddo_xlimport == NULL || ddo->ddo_xlexport == NULL)) + return (-1); /* errno is set for us */ + + for (i = 0; i < nx; i++) { + ddo->ddo_xlimport[i] = DOF_SECIDX_NONE; + ddo->ddo_xlexport[i] = DOF_SECIDX_NONE; + } + + dt_buf_reset(dtp, &ddo->ddo_secs); + dt_buf_reset(dtp, &ddo->ddo_strs); + dt_buf_reset(dtp, &ddo->ddo_ldata); + dt_buf_reset(dtp, &ddo->ddo_udata); + + dt_buf_reset(dtp, &ddo->ddo_probes); + dt_buf_reset(dtp, &ddo->ddo_args); + dt_buf_reset(dtp, &ddo->ddo_offs); + dt_buf_reset(dtp, &ddo->ddo_enoffs); + dt_buf_reset(dtp, &ddo->ddo_rels); + + dt_buf_reset(dtp, &ddo->ddo_xlms); + return (0); +} + +/* + * Add a loadable DOF section to the file using the specified data buffer and + * the specified DOF section attributes. DOF_SECF_LOAD must be set in flags. + * If 'data' is NULL, the caller is responsible for manipulating the ldata buf. + */ +static dof_secidx_t +dof_add_lsect(dt_dof_t *ddo, const void *data, uint32_t type, + uint32_t align, uint32_t flags, uint32_t entsize, uint64_t size) +{ + dtrace_hdl_t *dtp = ddo->ddo_hdl; + dof_sec_t s; + + s.dofs_type = type; + s.dofs_align = align; + s.dofs_flags = flags | DOF_SECF_LOAD; + s.dofs_entsize = entsize; + s.dofs_offset = dt_buf_offset(&ddo->ddo_ldata, align); + s.dofs_size = size; + + dt_buf_write(dtp, &ddo->ddo_secs, &s, sizeof (s), sizeof (uint64_t)); + + if (data != NULL) + dt_buf_write(dtp, &ddo->ddo_ldata, data, size, align); + + return (ddo->ddo_nsecs++); +} + +/* + * Add an unloadable DOF section to the file using the specified data buffer + * and DOF section attributes. DOF_SECF_LOAD must *not* be set in flags. + * If 'data' is NULL, the caller is responsible for manipulating the udata buf. + */ +static dof_secidx_t +dof_add_usect(dt_dof_t *ddo, const void *data, uint32_t type, + uint32_t align, uint32_t flags, uint32_t entsize, uint64_t size) +{ + dtrace_hdl_t *dtp = ddo->ddo_hdl; + dof_sec_t s; + + s.dofs_type = type; + s.dofs_align = align; + s.dofs_flags = flags & ~DOF_SECF_LOAD; + s.dofs_entsize = entsize; + s.dofs_offset = dt_buf_offset(&ddo->ddo_udata, align); + s.dofs_size = size; + + dt_buf_write(dtp, &ddo->ddo_secs, &s, sizeof (s), sizeof (uint64_t)); + + if (data != NULL) + dt_buf_write(dtp, &ddo->ddo_udata, data, size, align); + + return (ddo->ddo_nsecs++); +} + +/* + * Add a string to the global string table associated with the DOF. The offset + * of the string is returned as an index into the string table. + */ +static dof_stridx_t +dof_add_string(dt_dof_t *ddo, const char *s) +{ + dt_buf_t *bp = &ddo->ddo_strs; + dof_stridx_t i = dt_buf_len(bp); + + if (i != 0 && (s == NULL || *s == '\0')) + return (0); /* string table has \0 at offset 0 */ + + dt_buf_write(ddo->ddo_hdl, bp, s, strlen(s) + 1, sizeof (char)); + return (i); +} + +static dof_attr_t +dof_attr(const dtrace_attribute_t *ap) +{ + return (DOF_ATTR(ap->dtat_name, ap->dtat_data, ap->dtat_class)); +} + +static dof_secidx_t +dof_add_difo(dt_dof_t *ddo, const dtrace_difo_t *dp) +{ + dof_secidx_t dsecs[5]; /* enough for all possible DIFO sections */ + uint_t nsecs = 0; + + dof_difohdr_t *dofd; + dof_relohdr_t dofr; + dof_secidx_t relsec; + + dof_secidx_t strsec = DOF_SECIDX_NONE; + dof_secidx_t intsec = DOF_SECIDX_NONE; + dof_secidx_t hdrsec = DOF_SECIDX_NONE; + + if (dp->dtdo_buf != NULL) { + dsecs[nsecs++] = dof_add_lsect(ddo, dp->dtdo_buf, + DOF_SECT_DIF, sizeof (dif_instr_t), 0, + sizeof (dif_instr_t), sizeof (dif_instr_t) * dp->dtdo_len); + } + + if (dp->dtdo_inttab != NULL) { + dsecs[nsecs++] = intsec = dof_add_lsect(ddo, dp->dtdo_inttab, + DOF_SECT_INTTAB, sizeof (uint64_t), 0, + sizeof (uint64_t), sizeof (uint64_t) * dp->dtdo_intlen); + } + + if (dp->dtdo_strtab != NULL) { + dsecs[nsecs++] = strsec = dof_add_lsect(ddo, dp->dtdo_strtab, + DOF_SECT_STRTAB, sizeof (char), 0, 0, dp->dtdo_strlen); + } + + if (dp->dtdo_vartab != NULL) { + dsecs[nsecs++] = dof_add_lsect(ddo, dp->dtdo_vartab, + DOF_SECT_VARTAB, sizeof (uint_t), 0, sizeof (dtrace_difv_t), + sizeof (dtrace_difv_t) * dp->dtdo_varlen); + } + + if (dp->dtdo_xlmtab != NULL) { + dof_xlref_t *xlt, *xlp; + dt_node_t **pnp; + + xlt = alloca(sizeof (dof_xlref_t) * dp->dtdo_xlmlen); + pnp = dp->dtdo_xlmtab; + + /* + * dtdo_xlmtab contains pointers to the translator members. + * The translator itself is in sect ddo_xlimport[dxp->dx_id]. + * The XLMEMBERS entries are in order by their dn_membid, so + * the member section offset is the population count of bits + * in ddo_pgp->dp_xlrefs[] up to and not including dn_membid. + */ + for (xlp = xlt; xlp < xlt + dp->dtdo_xlmlen; xlp++) { + dt_node_t *dnp = *pnp++; + dt_xlator_t *dxp = dnp->dn_membexpr->dn_xlator; + + xlp->dofxr_xlator = ddo->ddo_xlimport[dxp->dx_id]; + xlp->dofxr_member = dt_popcb( + ddo->ddo_pgp->dp_xrefs[dxp->dx_id], dnp->dn_membid); + xlp->dofxr_argn = (uint32_t)dxp->dx_arg; + } + + dsecs[nsecs++] = dof_add_lsect(ddo, xlt, DOF_SECT_XLTAB, + sizeof (dof_secidx_t), 0, sizeof (dof_xlref_t), + sizeof (dof_xlref_t) * dp->dtdo_xlmlen); + } + + /* + * Copy the return type and the array of section indices that form the + * DIFO into a single dof_difohdr_t and then add DOF_SECT_DIFOHDR. + */ + assert(nsecs <= sizeof (dsecs) / sizeof (dsecs[0])); + dofd = alloca(sizeof (dtrace_diftype_t) + sizeof (dsecs)); + bcopy(&dp->dtdo_rtype, &dofd->dofd_rtype, sizeof (dtrace_diftype_t)); + bcopy(dsecs, &dofd->dofd_links, sizeof (dof_secidx_t) * nsecs); + + hdrsec = dof_add_lsect(ddo, dofd, DOF_SECT_DIFOHDR, + sizeof (dof_secidx_t), 0, 0, + sizeof (dtrace_diftype_t) + sizeof (dof_secidx_t) * nsecs); + + /* + * Add any other sections related to dtrace_difo_t. These are not + * referenced in dof_difohdr_t because they are not used by emulation. + */ + if (dp->dtdo_kreltab != NULL) { + relsec = dof_add_lsect(ddo, dp->dtdo_kreltab, DOF_SECT_RELTAB, + sizeof (uint64_t), 0, sizeof (dof_relodesc_t), + sizeof (dof_relodesc_t) * dp->dtdo_krelen); + + /* + * This code assumes the target of all relocations is the + * integer table 'intsec' (DOF_SECT_INTTAB). If other sections + * need relocation in the future this will need to change. + */ + dofr.dofr_strtab = strsec; + dofr.dofr_relsec = relsec; + dofr.dofr_tgtsec = intsec; + + (void) dof_add_lsect(ddo, &dofr, DOF_SECT_KRELHDR, + sizeof (dof_secidx_t), 0, 0, sizeof (dof_relohdr_t)); + } + + if (dp->dtdo_ureltab != NULL) { + relsec = dof_add_lsect(ddo, dp->dtdo_ureltab, DOF_SECT_RELTAB, + sizeof (uint64_t), 0, sizeof (dof_relodesc_t), + sizeof (dof_relodesc_t) * dp->dtdo_urelen); + + /* + * This code assumes the target of all relocations is the + * integer table 'intsec' (DOF_SECT_INTTAB). If other sections + * need relocation in the future this will need to change. + */ + dofr.dofr_strtab = strsec; + dofr.dofr_relsec = relsec; + dofr.dofr_tgtsec = intsec; + + (void) dof_add_lsect(ddo, &dofr, DOF_SECT_URELHDR, + sizeof (dof_secidx_t), 0, 0, sizeof (dof_relohdr_t)); + } + + return (hdrsec); +} + +static void +dof_add_translator(dt_dof_t *ddo, const dt_xlator_t *dxp, uint_t type) +{ + dtrace_hdl_t *dtp = ddo->ddo_hdl; + dof_xlmember_t dofxm; + dof_xlator_t dofxl; + dof_secidx_t *xst; + + char buf[DT_TYPE_NAMELEN]; + dt_node_t *dnp; + uint_t i = 0; + + assert(type == DOF_SECT_XLIMPORT || type == DOF_SECT_XLEXPORT); + xst = type == DOF_SECT_XLIMPORT ? ddo->ddo_xlimport : ddo->ddo_xlexport; + + if (xst[dxp->dx_id] != DOF_SECIDX_NONE) + return; /* translator has already been emitted */ + + dt_buf_reset(dtp, &ddo->ddo_xlms); + + /* + * Generate an array of dof_xlmember_t's into ddo_xlms. If we are + * importing the translator, add only those members referenced by the + * program and set the dofxm_difo reference of each member to NONE. If + * we're exporting the translator, add all members and a DIFO for each. + */ + for (dnp = dxp->dx_members; dnp != NULL; dnp = dnp->dn_list, i++) { + if (type == DOF_SECT_XLIMPORT) { + if (!BT_TEST(ddo->ddo_pgp->dp_xrefs[dxp->dx_id], i)) + continue; /* member is not referenced */ + dofxm.dofxm_difo = DOF_SECIDX_NONE; + } else { + dofxm.dofxm_difo = dof_add_difo(ddo, + dxp->dx_membdif[dnp->dn_membid]); + } + + dofxm.dofxm_name = dof_add_string(ddo, dnp->dn_membname); + dt_node_diftype(dtp, dnp, &dofxm.dofxm_type); + + dt_buf_write(dtp, &ddo->ddo_xlms, + &dofxm, sizeof (dofxm), sizeof (uint32_t)); + } + + dofxl.dofxl_members = dof_add_lsect(ddo, NULL, DOF_SECT_XLMEMBERS, + sizeof (uint32_t), 0, sizeof (dofxm), dt_buf_len(&ddo->ddo_xlms)); + + dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_xlms, sizeof (uint32_t)); + + dofxl.dofxl_strtab = ddo->ddo_strsec; + dofxl.dofxl_argv = dof_add_string(ddo, ctf_type_name( + dxp->dx_src_ctfp, dxp->dx_src_type, buf, sizeof (buf))); + dofxl.dofxl_argc = 1; + dofxl.dofxl_type = dof_add_string(ddo, ctf_type_name( + dxp->dx_dst_ctfp, dxp->dx_dst_type, buf, sizeof (buf))); + dofxl.dofxl_attr = dof_attr(&dxp->dx_souid.di_attr); + + xst[dxp->dx_id] = dof_add_lsect(ddo, &dofxl, type, + sizeof (uint32_t), 0, 0, sizeof (dofxl)); +} + +/*ARGSUSED*/ +static int +dof_add_probe(dt_idhash_t *dhp, dt_ident_t *idp, void *data) +{ + dt_dof_t *ddo = data; + dtrace_hdl_t *dtp = ddo->ddo_hdl; + dt_probe_t *prp = idp->di_data; + + dof_probe_t dofpr; + dof_relodesc_t dofr; + dt_probe_instance_t *pip; + dt_node_t *dnp; + + char buf[DT_TYPE_NAMELEN]; + uint_t i; + + dofpr.dofpr_addr = 0; + dofpr.dofpr_name = dof_add_string(ddo, prp->pr_name); + dofpr.dofpr_nargv = dt_buf_len(&ddo->ddo_strs); + + for (dnp = prp->pr_nargs; dnp != NULL; dnp = dnp->dn_list) { + (void) dof_add_string(ddo, ctf_type_name(dnp->dn_ctfp, + dnp->dn_type, buf, sizeof (buf))); + } + + dofpr.dofpr_xargv = dt_buf_len(&ddo->ddo_strs); + + for (dnp = prp->pr_xargs; dnp != NULL; dnp = dnp->dn_list) { + (void) dof_add_string(ddo, ctf_type_name(dnp->dn_ctfp, + dnp->dn_type, buf, sizeof (buf))); + } + + dofpr.dofpr_argidx = dt_buf_len(&ddo->ddo_args) / sizeof (uint8_t); + + for (i = 0; i < prp->pr_xargc; i++) { + dt_buf_write(dtp, &ddo->ddo_args, &prp->pr_mapping[i], + sizeof (uint8_t), sizeof (uint8_t)); + } + + dofpr.dofpr_nargc = prp->pr_nargc; + dofpr.dofpr_xargc = prp->pr_xargc; + dofpr.dofpr_pad1 = 0; + dofpr.dofpr_pad2 = 0; + + for (pip = prp->pr_inst; pip != NULL; pip = pip->pi_next) { + dt_dprintf("adding probe for %s:%s\n", pip->pi_fname, + prp->pr_name); + + dofpr.dofpr_func = dof_add_string(ddo, pip->pi_fname); + + /* + * There should be one probe offset or is-enabled probe offset + * or else this probe instance won't have been created. The + * kernel will reject DOF which has a probe with no offsets. + */ + assert(pip->pi_noffs + pip->pi_nenoffs > 0); + + dofpr.dofpr_offidx = + dt_buf_len(&ddo->ddo_offs) / sizeof (uint32_t); + dofpr.dofpr_noffs = pip->pi_noffs; + dt_buf_write(dtp, &ddo->ddo_offs, pip->pi_offs, + pip->pi_noffs * sizeof (uint32_t), sizeof (uint32_t)); + + dofpr.dofpr_enoffidx = + dt_buf_len(&ddo->ddo_enoffs) / sizeof (uint32_t); + dofpr.dofpr_nenoffs = pip->pi_nenoffs; + dt_buf_write(dtp, &ddo->ddo_enoffs, pip->pi_enoffs, + pip->pi_nenoffs * sizeof (uint32_t), sizeof (uint32_t)); + + /* + * If pi_rname isn't set, the relocation will be against the + * function name. If it is, the relocation will be against + * pi_rname. This will be used if the function is scoped + * locally so an alternate symbol is added for the purpose + * of this relocation. + */ + if (pip->pi_rname == NULL) + dofr.dofr_name = dofpr.dofpr_func; + else + dofr.dofr_name = dof_add_string(ddo, pip->pi_rname); + dofr.dofr_type = DOF_RELO_SETX; + dofr.dofr_offset = dt_buf_len(&ddo->ddo_probes); + dofr.dofr_data = 0; + + dt_buf_write(dtp, &ddo->ddo_rels, &dofr, + sizeof (dofr), sizeof (uint64_t)); + + dt_buf_write(dtp, &ddo->ddo_probes, &dofpr, + sizeof (dofpr), sizeof (uint64_t)); + } + + return (0); +} + +static int +dof_add_provider(dt_dof_t *ddo, const dt_provider_t *pvp) +{ + dtrace_hdl_t *dtp = ddo->ddo_hdl; + dof_provider_t dofpv; + dof_relohdr_t dofr; + dof_secidx_t *dofs; + ulong_t xr, nxr; + size_t sz; + id_t i; + + if (pvp->pv_flags & DT_PROVIDER_IMPL) { + /* + * ignore providers that are exported by dtrace(7D) + */ + return (0); + } + + nxr = dt_popcb(pvp->pv_xrefs, pvp->pv_xrmax); + dofs = alloca(sizeof (dof_secidx_t) * (nxr + 1)); + xr = 1; /* reserve dofs[0] for the provider itself */ + + /* + * For each translator referenced by the provider (pv_xrefs), emit an + * exported translator section for it if one hasn't been created yet. + */ + for (i = 0; i < pvp->pv_xrmax; i++) { + if (BT_TEST(pvp->pv_xrefs, i) && + dtp->dt_xlatemode == DT_XL_DYNAMIC) { + dof_add_translator(ddo, + dt_xlator_lookup_id(dtp, i), DOF_SECT_XLEXPORT); + dofs[xr++] = ddo->ddo_xlexport[i]; + } + } + + dt_buf_reset(dtp, &ddo->ddo_probes); + dt_buf_reset(dtp, &ddo->ddo_args); + dt_buf_reset(dtp, &ddo->ddo_offs); + dt_buf_reset(dtp, &ddo->ddo_enoffs); + dt_buf_reset(dtp, &ddo->ddo_rels); + + (void) dt_idhash_iter(pvp->pv_probes, dof_add_probe, ddo); + + if (dt_buf_len(&ddo->ddo_probes) == 0) + return (dt_set_errno(dtp, EDT_NOPROBES)); + + dofpv.dofpv_probes = dof_add_lsect(ddo, NULL, DOF_SECT_PROBES, + sizeof (uint64_t), 0, sizeof (dof_probe_t), + dt_buf_len(&ddo->ddo_probes)); + + dt_buf_concat(dtp, &ddo->ddo_ldata, + &ddo->ddo_probes, sizeof (uint64_t)); + + dofpv.dofpv_prargs = dof_add_lsect(ddo, NULL, DOF_SECT_PRARGS, + sizeof (uint8_t), 0, sizeof (uint8_t), dt_buf_len(&ddo->ddo_args)); + + dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_args, sizeof (uint8_t)); + + dofpv.dofpv_proffs = dof_add_lsect(ddo, NULL, DOF_SECT_PROFFS, + sizeof (uint_t), 0, sizeof (uint_t), dt_buf_len(&ddo->ddo_offs)); + + dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_offs, sizeof (uint_t)); + + if ((sz = dt_buf_len(&ddo->ddo_enoffs)) != 0) { + dofpv.dofpv_prenoffs = dof_add_lsect(ddo, NULL, + DOF_SECT_PRENOFFS, sizeof (uint_t), 0, sizeof (uint_t), sz); + } else { + dofpv.dofpv_prenoffs = DOF_SECT_NONE; + } + + dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_enoffs, sizeof (uint_t)); + + dofpv.dofpv_strtab = ddo->ddo_strsec; + dofpv.dofpv_name = dof_add_string(ddo, pvp->pv_desc.dtvd_name); + + dofpv.dofpv_provattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_provider); + dofpv.dofpv_modattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_mod); + dofpv.dofpv_funcattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_func); + dofpv.dofpv_nameattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_name); + dofpv.dofpv_argsattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_args); + + dofs[0] = dof_add_lsect(ddo, &dofpv, DOF_SECT_PROVIDER, + sizeof (dof_secidx_t), 0, 0, sizeof (dof_provider_t)); + + dofr.dofr_strtab = dofpv.dofpv_strtab; + dofr.dofr_tgtsec = dofpv.dofpv_probes; + dofr.dofr_relsec = dof_add_lsect(ddo, NULL, DOF_SECT_RELTAB, + sizeof (uint64_t), 0, sizeof (dof_relodesc_t), + dt_buf_len(&ddo->ddo_rels)); + + dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_rels, sizeof (uint64_t)); + + (void) dof_add_lsect(ddo, &dofr, DOF_SECT_URELHDR, + sizeof (dof_secidx_t), 0, 0, sizeof (dof_relohdr_t)); + + if (nxr != 0 && dtp->dt_xlatemode == DT_XL_DYNAMIC) { + (void) dof_add_lsect(ddo, dofs, DOF_SECT_PREXPORT, + sizeof (dof_secidx_t), 0, sizeof (dof_secidx_t), + sizeof (dof_secidx_t) * (nxr + 1)); + } + + return (0); +} + +static int +dof_hdr(dtrace_hdl_t *dtp, uint8_t dofversion, dof_hdr_t *hp) +{ + /* + * If our config values cannot fit in a uint8_t, we can't generate a + * DOF header since the values won't fit. This can only happen if the + * user forcibly compiles a program with an artificial configuration. + */ + if (dtp->dt_conf.dtc_difversion > UINT8_MAX || + dtp->dt_conf.dtc_difintregs > UINT8_MAX || + dtp->dt_conf.dtc_diftupregs > UINT8_MAX) + return (dt_set_errno(dtp, EOVERFLOW)); + + bzero(hp, sizeof (dof_hdr_t)); + + hp->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; + hp->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; + hp->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; + hp->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3; + + if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64) + hp->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_LP64; + else + hp->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_ILP32; + + hp->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE; + hp->dofh_ident[DOF_ID_VERSION] = dofversion; + hp->dofh_ident[DOF_ID_DIFVERS] = dtp->dt_conf.dtc_difversion; + hp->dofh_ident[DOF_ID_DIFIREG] = dtp->dt_conf.dtc_difintregs; + hp->dofh_ident[DOF_ID_DIFTREG] = dtp->dt_conf.dtc_diftupregs; + + hp->dofh_hdrsize = sizeof (dof_hdr_t); + hp->dofh_secsize = sizeof (dof_sec_t); + hp->dofh_secoff = sizeof (dof_hdr_t); + + return (0); +} + +void * +dtrace_dof_create(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t flags) +{ + dt_dof_t *ddo = &dtp->dt_dof; + + const dtrace_ecbdesc_t *edp, *last; + const dtrace_probedesc_t *pdp; + const dtrace_actdesc_t *ap; + const dt_stmt_t *stp; + + uint_t maxacts = 0; + uint_t maxfmt = 0; + + dt_provider_t *pvp; + dt_xlator_t *dxp; + dof_actdesc_t *dofa; + dof_sec_t *sp; + size_t ssize, lsize; + dof_hdr_t h; + + dt_buf_t dof; + char *fmt; + uint_t i; + + if (flags & ~DTRACE_D_MASK) { + (void) dt_set_errno(dtp, EINVAL); + return (NULL); + } + + flags |= dtp->dt_dflags; + + if (dof_hdr(dtp, pgp->dp_dofversion, &h) != 0) + return (NULL); + + if (dt_dof_reset(dtp, pgp) != 0) + return (NULL); + + /* + * Iterate through the statement list computing the maximum number of + * actions and the maximum format string for allocating local buffers. + */ + for (last = NULL, stp = dt_list_next(&pgp->dp_stmts); + stp != NULL; stp = dt_list_next(stp), last = edp) { + + dtrace_stmtdesc_t *sdp = stp->ds_desc; + dtrace_actdesc_t *ap = sdp->dtsd_action; + + if (sdp->dtsd_fmtdata != NULL) { + i = dtrace_printf_format(dtp, + sdp->dtsd_fmtdata, NULL, 0); + maxfmt = MAX(maxfmt, i); + } + + if ((edp = sdp->dtsd_ecbdesc) == last) + continue; /* same ecb as previous statement */ + + for (i = 0, ap = edp->dted_action; ap; ap = ap->dtad_next) + i++; + + maxacts = MAX(maxacts, i); + } + + dofa = alloca(sizeof (dof_actdesc_t) * maxacts); + fmt = alloca(maxfmt + 1); + + ddo->ddo_strsec = dof_add_lsect(ddo, NULL, DOF_SECT_STRTAB, 1, 0, 0, 0); + (void) dof_add_string(ddo, ""); + + /* + * If there are references to dynamic translators in the program, add + * an imported translator table entry for each referenced translator. + */ + if (pgp->dp_xrefslen != 0) { + for (dxp = dt_list_next(&dtp->dt_xlators); + dxp != NULL; dxp = dt_list_next(dxp)) { + if (dxp->dx_id < pgp->dp_xrefslen && + pgp->dp_xrefs[dxp->dx_id] != NULL) + dof_add_translator(ddo, dxp, DOF_SECT_XLIMPORT); + } + } + + /* + * Now iterate through the statement list, creating the DOF section + * headers and data for each one and adding them to our buffers. + */ + for (last = NULL, stp = dt_list_next(&pgp->dp_stmts); + stp != NULL; stp = dt_list_next(stp), last = edp) { + + dof_secidx_t probesec = DOF_SECIDX_NONE; + dof_secidx_t prdsec = DOF_SECIDX_NONE; + dof_secidx_t actsec = DOF_SECIDX_NONE; + + const dt_stmt_t *next = stp; + dtrace_stmtdesc_t *sdp = stp->ds_desc; + dof_stridx_t strndx = 0; + dof_probedesc_t dofp; + dof_ecbdesc_t dofe; + uint_t i; + + if ((edp = stp->ds_desc->dtsd_ecbdesc) == last) + continue; /* same ecb as previous statement */ + + pdp = &edp->dted_probe; + + /* + * Add a DOF_SECT_PROBEDESC for the ECB's probe description, + * and copy the probe description strings into the string table. + */ + dofp.dofp_strtab = ddo->ddo_strsec; + dofp.dofp_provider = dof_add_string(ddo, pdp->dtpd_provider); + dofp.dofp_mod = dof_add_string(ddo, pdp->dtpd_mod); + dofp.dofp_func = dof_add_string(ddo, pdp->dtpd_func); + dofp.dofp_name = dof_add_string(ddo, pdp->dtpd_name); + dofp.dofp_id = pdp->dtpd_id; + + probesec = dof_add_lsect(ddo, &dofp, DOF_SECT_PROBEDESC, + sizeof (dof_secidx_t), 0, + sizeof (dof_probedesc_t), sizeof (dof_probedesc_t)); + + /* + * If there is a predicate DIFO associated with the ecbdesc, + * write out the DIFO sections and save the DIFO section index. + */ + if (edp->dted_pred.dtpdd_difo != NULL) + prdsec = dof_add_difo(ddo, edp->dted_pred.dtpdd_difo); + + /* + * Now iterate through the action list generating DIFOs as + * referenced therein and adding action descriptions to 'dofa'. + */ + for (i = 0, ap = edp->dted_action; + ap != NULL; ap = ap->dtad_next, i++) { + + if (ap->dtad_difo != NULL) { + dofa[i].dofa_difo = + dof_add_difo(ddo, ap->dtad_difo); + } else + dofa[i].dofa_difo = DOF_SECIDX_NONE; + + /* + * If the first action in a statement has string data, + * add the string to the global string table. This can + * be due either to a printf() format string + * (dtsd_fmtdata) or a print() type string + * (dtsd_strdata). + */ + if (sdp != NULL && ap == sdp->dtsd_action) { + if (sdp->dtsd_fmtdata != NULL) { + (void) dtrace_printf_format(dtp, + sdp->dtsd_fmtdata, fmt, maxfmt + 1); + strndx = dof_add_string(ddo, fmt); + } else if (sdp->dtsd_strdata != NULL) { + strndx = dof_add_string(ddo, + sdp->dtsd_strdata); + } else { + strndx = 0; /* use dtad_arg instead */ + } + + if ((next = dt_list_next(next)) != NULL) + sdp = next->ds_desc; + else + sdp = NULL; + } + + if (strndx != 0) { + dofa[i].dofa_arg = strndx; + dofa[i].dofa_strtab = ddo->ddo_strsec; + } else { + dofa[i].dofa_arg = ap->dtad_arg; + dofa[i].dofa_strtab = DOF_SECIDX_NONE; + } + + dofa[i].dofa_kind = ap->dtad_kind; + dofa[i].dofa_ntuple = ap->dtad_ntuple; + dofa[i].dofa_uarg = ap->dtad_uarg; + } + + if (i > 0) { + actsec = dof_add_lsect(ddo, dofa, DOF_SECT_ACTDESC, + sizeof (uint64_t), 0, sizeof (dof_actdesc_t), + sizeof (dof_actdesc_t) * i); + } + + /* + * Now finally, add the DOF_SECT_ECBDESC referencing all the + * previously created sub-sections. + */ + dofe.dofe_probes = probesec; + dofe.dofe_pred = prdsec; + dofe.dofe_actions = actsec; + dofe.dofe_pad = 0; + dofe.dofe_uarg = edp->dted_uarg; + + (void) dof_add_lsect(ddo, &dofe, DOF_SECT_ECBDESC, + sizeof (uint64_t), 0, 0, sizeof (dof_ecbdesc_t)); + } + + /* + * If any providers are user-defined, output DOF sections corresponding + * to the providers and the probes and arguments that they define. + */ + if (flags & DTRACE_D_PROBES) { + for (pvp = dt_list_next(&dtp->dt_provlist); + pvp != NULL; pvp = dt_list_next(pvp)) { + if (dof_add_provider(ddo, pvp) != 0) + return (NULL); + } + } + + /* + * If we're not stripping unloadable sections, generate compiler + * comments and any other unloadable miscellany. + */ + if (!(flags & DTRACE_D_STRIP)) { + (void) dof_add_usect(ddo, _dtrace_version, DOF_SECT_COMMENTS, + sizeof (char), 0, 0, strlen(_dtrace_version) + 1); + (void) dof_add_usect(ddo, &dtp->dt_uts, DOF_SECT_UTSNAME, + sizeof (char), 0, 0, sizeof (struct utsname)); + } + + /* + * Compute and fill in the appropriate values for the dof_hdr_t's + * dofh_secnum, dofh_loadsz, and dofh_filez values. + */ + h.dofh_secnum = ddo->ddo_nsecs; + ssize = sizeof (h) + dt_buf_len(&ddo->ddo_secs); + + h.dofh_loadsz = ssize + + dt_buf_len(&ddo->ddo_ldata) + + dt_buf_len(&ddo->ddo_strs); + + if (dt_buf_len(&ddo->ddo_udata) != 0) { + lsize = roundup(h.dofh_loadsz, sizeof (uint64_t)); + h.dofh_filesz = lsize + dt_buf_len(&ddo->ddo_udata); + } else { + lsize = h.dofh_loadsz; + h.dofh_filesz = lsize; + } + + /* + * Set the global DOF_SECT_STRTAB's offset to be after the header, + * section headers, and other loadable data. Since we're going to + * iterate over the buffer data directly, we must check for errors. + */ + if ((i = dt_buf_error(&ddo->ddo_secs)) != 0) { + (void) dt_set_errno(dtp, i); + return (NULL); + } + + sp = dt_buf_ptr(&ddo->ddo_secs); + assert(sp[ddo->ddo_strsec].dofs_type == DOF_SECT_STRTAB); + assert(ssize == sizeof (h) + sizeof (dof_sec_t) * ddo->ddo_nsecs); + + sp[ddo->ddo_strsec].dofs_offset = ssize + dt_buf_len(&ddo->ddo_ldata); + sp[ddo->ddo_strsec].dofs_size = dt_buf_len(&ddo->ddo_strs); + + /* + * Now relocate all the other section headers by adding the appropriate + * delta to their respective dofs_offset values. + */ + for (i = 0; i < ddo->ddo_nsecs; i++, sp++) { + if (i == ddo->ddo_strsec) + continue; /* already relocated above */ + + if (sp->dofs_flags & DOF_SECF_LOAD) + sp->dofs_offset += ssize; + else + sp->dofs_offset += lsize; + } + + /* + * Finally, assemble the complete in-memory DOF buffer by writing the + * header and then concatenating all our buffers. dt_buf_concat() will + * propagate any errors and cause dt_buf_claim() to return NULL. + */ + dt_buf_create(dtp, &dof, "dof", h.dofh_filesz); + + dt_buf_write(dtp, &dof, &h, sizeof (h), sizeof (uint64_t)); + dt_buf_concat(dtp, &dof, &ddo->ddo_secs, sizeof (uint64_t)); + dt_buf_concat(dtp, &dof, &ddo->ddo_ldata, sizeof (uint64_t)); + dt_buf_concat(dtp, &dof, &ddo->ddo_strs, sizeof (char)); + dt_buf_concat(dtp, &dof, &ddo->ddo_udata, sizeof (uint64_t)); + + return (dt_buf_claim(dtp, &dof)); +} + +void +dtrace_dof_destroy(dtrace_hdl_t *dtp, void *dof) +{ + dt_free(dtp, dof); +} + +void * +dtrace_getopt_dof(dtrace_hdl_t *dtp) +{ + dof_hdr_t *dof; + dof_sec_t *sec; + dof_optdesc_t *dofo; + int i, nopts = 0, len = sizeof (dof_hdr_t) + + roundup(sizeof (dof_sec_t), sizeof (uint64_t)); + + for (i = 0; i < DTRACEOPT_MAX; i++) { + if (dtp->dt_options[i] != DTRACEOPT_UNSET) + nopts++; + } + + len += sizeof (dof_optdesc_t) * nopts; + + if ((dof = dt_zalloc(dtp, len)) == NULL || + dof_hdr(dtp, DOF_VERSION, dof) != 0) { + dt_free(dtp, dof); + return (NULL); + } + + dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */ + dof->dofh_loadsz = len; + dof->dofh_filesz = len; + + /* + * Fill in the option section header... + */ + sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t)); + sec->dofs_type = DOF_SECT_OPTDESC; + sec->dofs_align = sizeof (uint64_t); + sec->dofs_flags = DOF_SECF_LOAD; + sec->dofs_entsize = sizeof (dof_optdesc_t); + + dofo = (dof_optdesc_t *)((uintptr_t)sec + + roundup(sizeof (dof_sec_t), sizeof (uint64_t))); + + sec->dofs_offset = (uintptr_t)dofo - (uintptr_t)dof; + sec->dofs_size = sizeof (dof_optdesc_t) * nopts; + + for (i = 0; i < DTRACEOPT_MAX; i++) { + if (dtp->dt_options[i] == DTRACEOPT_UNSET) + continue; + + dofo->dofo_option = i; + dofo->dofo_strtab = DOF_SECIDX_NONE; + dofo->dofo_value = dtp->dt_options[i]; + dofo++; + } + + return (dof); +} + +void * +dtrace_geterr_dof(dtrace_hdl_t *dtp) +{ + if (dtp->dt_errprog != NULL) + return (dtrace_dof_create(dtp, dtp->dt_errprog, 0)); + + (void) dt_set_errno(dtp, EDT_BADERROR); + return (NULL); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.h new file mode 100644 index 0000000..e0a4bf5 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.h @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_DOF_H +#define _DT_DOF_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <dtrace.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#include <dt_buf.h> + +typedef struct dt_dof { + dtrace_hdl_t *ddo_hdl; /* libdtrace handle */ + dtrace_prog_t *ddo_pgp; /* current program */ + uint_t ddo_nsecs; /* number of sections */ + dof_secidx_t ddo_strsec; /* global strings section index */ + dof_secidx_t *ddo_xlimport; /* imported xlator section indices */ + dof_secidx_t *ddo_xlexport; /* exported xlator section indices */ + dt_buf_t ddo_secs; /* section headers */ + dt_buf_t ddo_strs; /* global strings */ + dt_buf_t ddo_ldata; /* loadable section data */ + dt_buf_t ddo_udata; /* unloadable section data */ + dt_buf_t ddo_probes; /* probe section data */ + dt_buf_t ddo_args; /* probe arguments section data */ + dt_buf_t ddo_offs; /* probe offsets section data */ + dt_buf_t ddo_enoffs; /* is-enabled offsets section data */ + dt_buf_t ddo_rels; /* probe relocation section data */ + dt_buf_t ddo_xlms; /* xlate members section data */ +} dt_dof_t; + +extern void dt_dof_init(dtrace_hdl_t *); +extern void dt_dof_fini(dtrace_hdl_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_DOF_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_error.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_error.c new file mode 100644 index 0000000..2327ff7 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_error.c @@ -0,0 +1,241 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <string.h> +#include <strings.h> +#include <dt_impl.h> + +static const struct { + int err; + const char *msg; +} _dt_errlist[] = { + { EDT_VERSION, "Client requested version newer than library" }, + { EDT_VERSINVAL, "Version is not properly formatted or is too large" }, + { EDT_VERSUNDEF, "Requested version is not supported by compiler" }, + { EDT_VERSREDUCED, "Requested version conflicts with earlier setting" }, + { EDT_CTF, "Unexpected libctf error" }, + { EDT_COMPILER, "Error in D program compilation" }, + { EDT_NOTUPREG, "Insufficient tuple registers to generate code" }, + { EDT_NOMEM, "Memory allocation failure" }, + { EDT_INT2BIG, "Integer constant table limit exceeded" }, + { EDT_STR2BIG, "String constant table limit exceeded" }, + { EDT_NOMOD, "Unknown module name" }, + { EDT_NOPROV, "Unknown provider name" }, + { EDT_NOPROBE, "No probe matches description" }, + { EDT_NOSYM, "Unknown symbol name" }, + { EDT_NOSYMADDR, "No symbol corresponds to address" }, + { EDT_NOTYPE, "Unknown type name" }, + { EDT_NOVAR, "Unknown variable name" }, + { EDT_NOAGG, "Unknown aggregation name" }, + { EDT_BADSCOPE, "Improper use of scoping operator in type name" }, + { EDT_BADSPEC, "Overspecified probe description" }, + { EDT_BADSPCV, "Undefined macro variable in probe description" }, + { EDT_BADID, "Unknown probe identifier" }, + { EDT_NOTLOADED, "Module is no longer loaded" }, + { EDT_NOCTF, "Module does not contain any CTF data" }, + { EDT_DATAMODEL, "Module and program data models do not match" }, + { EDT_DIFVERS, "Library uses newer DIF version than kernel" }, + { EDT_BADAGG, "Unknown aggregating action" }, + { EDT_FIO, "Error occurred while reading from input stream" }, + { EDT_DIFINVAL, "DIF program content is invalid" }, + { EDT_DIFSIZE, "DIF program exceeds maximum program size" }, + { EDT_DIFFAULT, "DIF program contains invalid pointer" }, + { EDT_BADPROBE, "Invalid probe specification" }, + { EDT_BADPGLOB, "Probe description has too many globbing characters" }, + { EDT_NOSCOPE, "Declaration scope stack underflow" }, + { EDT_NODECL, "Declaration stack underflow" }, + { EDT_DMISMATCH, "Data record list does not match statement" }, + { EDT_DOFFSET, "Data record offset exceeds buffer boundary" }, + { EDT_DALIGN, "Data record has inappropriate alignment" }, + { EDT_BADOPTNAME, "Invalid option name" }, + { EDT_BADOPTVAL, "Invalid value for specified option" }, + { EDT_BADOPTCTX, "Option cannot be used from within a D program" }, + { EDT_CPPFORK, "Failed to fork preprocessor" }, + { EDT_CPPEXEC, "Failed to exec preprocessor" }, + { EDT_CPPENT, "Preprocessor not found" }, + { EDT_CPPERR, "Preprocessor failed to process input program" }, + { EDT_SYMOFLOW, "Symbol table identifier space exhausted" }, + { EDT_ACTIVE, "Operation illegal when tracing is active" }, + { EDT_DESTRUCTIVE, "Destructive actions not allowed" }, + { EDT_NOANON, "No anonymous tracing state" }, + { EDT_ISANON, "Can't claim anonymous state and enable probes" }, + { EDT_ENDTOOBIG, "END enablings exceed size of principal buffer" }, + { EDT_NOCONV, "Failed to load type for printf conversion" }, + { EDT_BADCONV, "Incomplete printf conversion" }, + { EDT_BADERROR, "Invalid library ERROR action" }, + { EDT_ERRABORT, "Abort due to error" }, + { EDT_DROPABORT, "Abort due to drop" }, + { EDT_DIRABORT, "Abort explicitly directed" }, + { EDT_BADRVAL, "Invalid return value from callback" }, + { EDT_BADNORMAL, "Invalid normalization" }, + { EDT_BUFTOOSMALL, "Enabling exceeds size of buffer" }, + { EDT_BADTRUNC, "Invalid truncation" }, + { EDT_BUSY, "DTrace cannot be used when kernel debugger is active" }, + { EDT_ACCESS, "DTrace requires additional privileges" }, + { EDT_NOENT, "DTrace device not available on system" }, + { EDT_BRICKED, "Abort due to systemic unresponsiveness" }, + { EDT_HARDWIRE, "Failed to load language definitions" }, + { EDT_ELFVERSION, "libelf is out-of-date with respect to libdtrace" }, + { EDT_NOBUFFERED, "Attempt to buffer output without handler" }, + { EDT_UNSTABLE, "Description matched an unstable set of probes" }, + { EDT_BADSETOPT, "Invalid setopt() library action" }, + { EDT_BADSTACKPC, "Invalid stack program counter size" }, + { EDT_BADAGGVAR, "Invalid aggregation variable identifier" }, + { EDT_OVERSION, "Client requested deprecated version of library" }, + { EDT_ENABLING_ERR, "Failed to enable probe" }, + { EDT_NOPROBES, "No probe sites found for declared provider" }, + { EDT_CANTLOAD, "Failed to load module" }, +}; + +static const int _dt_nerr = sizeof (_dt_errlist) / sizeof (_dt_errlist[0]); + +const char * +dtrace_errmsg(dtrace_hdl_t *dtp, int error) +{ + const char *str; + int i; + + if (error == EDT_COMPILER && dtp != NULL && dtp->dt_errmsg[0] != '\0') + str = dtp->dt_errmsg; + else if (error == EDT_CTF && dtp != NULL && dtp->dt_ctferr != 0) + str = ctf_errmsg(dtp->dt_ctferr); + else if (error >= EDT_BASE && (error - EDT_BASE) < _dt_nerr) { + for (i = 0; i < _dt_nerr; i++) { + if (_dt_errlist[i].err == error) + return (_dt_errlist[i].msg); + } + str = NULL; + } else + str = strerror(error); + + return (str ? str : "Unknown error"); +} + +int +dtrace_errno(dtrace_hdl_t *dtp) +{ + return (dtp->dt_errno); +} + +#if defined(sun) +int +dt_set_errno(dtrace_hdl_t *dtp, int err) +{ + dtp->dt_errno = err; + return (-1); +} +#else +int +_dt_set_errno(dtrace_hdl_t *dtp, int err, const char *errfile, int errline) +{ + dtp->dt_errno = err; + dtp->dt_errfile = errfile; + dtp->dt_errline = errline; + return (-1); +} + +void dt_get_errloc(dtrace_hdl_t *dtp, const char **p_errfile, int *p_errline) +{ + *p_errfile = dtp->dt_errfile; + *p_errline = dtp->dt_errline; +} +#endif + +void +dt_set_errmsg(dtrace_hdl_t *dtp, const char *errtag, const char *region, + const char *filename, int lineno, const char *format, va_list ap) +{ + size_t len, n; + char *p, *s; + + s = dtp->dt_errmsg; + n = sizeof (dtp->dt_errmsg); + + if (errtag != NULL && (yypcb->pcb_cflags & DTRACE_C_ETAGS)) + (void) snprintf(s, n, "[%s] ", errtag); + else + s[0] = '\0'; + + len = strlen(dtp->dt_errmsg); + s = dtp->dt_errmsg + len; + n = sizeof (dtp->dt_errmsg) - len; + + if (filename == NULL) + filename = dtp->dt_filetag; + + if (filename != NULL) + (void) snprintf(s, n, "\"%s\", line %d: ", filename, lineno); + else if (lineno != 0) + (void) snprintf(s, n, "line %d: ", lineno); + else if (region != NULL) + (void) snprintf(s, n, "in %s: ", region); + + len = strlen(dtp->dt_errmsg); + s = dtp->dt_errmsg + len; + n = sizeof (dtp->dt_errmsg) - len; + (void) vsnprintf(s, n, format, ap); + + if ((p = strrchr(dtp->dt_errmsg, '\n')) != NULL) + *p = '\0'; /* remove trailing \n from message buffer */ + + dtp->dt_errtag = errtag; +} + +/*ARGSUSED*/ +const char * +dtrace_faultstr(dtrace_hdl_t *dtp, int fault) +{ + int i; + + static const struct { + int code; + const char *str; + } faults[] = { + { DTRACEFLT_BADADDR, "invalid address" }, + { DTRACEFLT_BADALIGN, "invalid alignment" }, + { DTRACEFLT_ILLOP, "illegal operation" }, + { DTRACEFLT_DIVZERO, "divide-by-zero" }, + { DTRACEFLT_NOSCRATCH, "out of scratch space" }, + { DTRACEFLT_KPRIV, "invalid kernel access" }, + { DTRACEFLT_UPRIV, "invalid user access" }, + { DTRACEFLT_TUPOFLOW, "tuple stack overflow" }, + { DTRACEFLT_BADSTACK, "bad stack" }, + { DTRACEFLT_LIBRARY, "library-level fault" }, + { 0, NULL } + }; + + for (i = 0; faults[i].str != NULL; i++) { + if (faults[i].code == fault) + return (faults[i].str); + } + + return ("unknown fault"); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_errtags.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_errtags.h new file mode 100644 index 0000000..6bc392f --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_errtags.h @@ -0,0 +1,278 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + /* + * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#ifndef _DT_ERRTAGS_H +#define _DT_ERRTAGS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This enum definition is used to define a set of error tags associated with + * the D compiler's various error conditions. The shell script mkerrtags.sh is + * used to parse this file and create a corresponding dt_errtags.c source file. + * If you do something other than add a new error tag here, you may need to + * update the mkerrtags shell script as it is based upon simple regexps. + */ +typedef enum { + D_UNKNOWN, /* unknown D compiler error */ + D_SYNTAX, /* syntax error in input stream */ + D_EMPTY, /* empty translation unit */ + D_TYPE_ERR, /* type definition missing */ + D_TYPE_MEMBER, /* type member not found */ + D_ASRELO, /* relocation remains against symbol */ + D_CG_EXPR, /* tracing function called from expr */ + D_CG_DYN, /* expression returns dynamic result */ + D_ATTR_MIN, /* attributes less than amin setting */ + D_ID_OFLOW, /* identifier space overflow */ + D_PDESC_ZERO, /* probedesc matches zero probes */ + D_PDESC_INVAL, /* probedesc is not valid */ + D_PRED_SCALAR, /* predicate must be of scalar type */ + D_FUNC_IDENT, /* function designator is not ident */ + D_FUNC_UNDEF, /* function ident is not defined */ + D_FUNC_IDKIND, /* function ident is of wrong idkind */ + D_OFFSETOF_TYPE, /* offsetof arg is not sou type */ + D_OFFSETOF_BITFIELD, /* offsetof applied to field member */ + D_SIZEOF_TYPE, /* invalid sizeof type */ + D_SIZEOF_BITFIELD, /* sizeof applied to field member */ + D_STRINGOF_TYPE, /* invalid stringof type */ + D_OP_IDENT, /* operand must be an identifier */ + D_OP_INT, /* operand must be integral type */ + D_OP_SCALAR, /* operand must be scalar type */ + D_OP_ARITH, /* operand must be arithmetic type */ + D_OP_WRITE, /* operand must be writable variable */ + D_OP_LVAL, /* operand must be lvalue */ + D_OP_INCOMPAT, /* operand types are not compatible */ + D_OP_VFPTR, /* operand cannot be void or func ptr */ + D_OP_ARRFUN, /* operand cannot be array or func */ + D_OP_PTR, /* operand must be a pointer */ + D_OP_SOU, /* operand must be struct or union */ + D_OP_INCOMPLETE, /* operand is an incomplete type */ + D_OP_DYN, /* operand cannot be of dynamic type */ + D_OP_ACT, /* operand cannot be action */ + D_AGG_REDEF, /* aggregation cannot be redefined */ + D_AGG_FUNC, /* aggregating function required */ + D_AGG_MDIM, /* aggregation used as multi-dim arr */ + D_ARR_BADREF, /* access non-array using tuple */ + D_ARR_LOCAL, /* cannot define local assc array */ + D_DIV_ZERO, /* division by zero detected */ + D_DEREF_NONPTR, /* dereference non-pointer type */ + D_DEREF_VOID, /* dereference void pointer */ + D_DEREF_FUNC, /* dereference function pointer */ + D_ADDROF_LVAL, /* unary & applied to non-lvalue */ + D_ADDROF_VAR, /* unary & applied to variable */ + D_ADDROF_BITFIELD, /* unary & applied to field member */ + D_XLATE_REDECL, /* translator redeclared */ + D_XLATE_NOCONV, /* no conversion for member defined */ + D_XLATE_NONE, /* no translator for type combo */ + D_XLATE_SOU, /* dst must be struct or union type */ + D_XLATE_INCOMPAT, /* translator member type incompat */ + D_XLATE_MEMB, /* translator member is not valid */ + D_CAST_INVAL, /* invalid cast expression */ + D_PRAGERR, /* #pragma error message */ + D_PRAGCTL_INVAL, /* invalid control directive */ + D_PRAGMA_INVAL, /* invalid compiler pragma */ + D_PRAGMA_UNUSED, /* unused compiler pragma */ + D_PRAGMA_MALFORM, /* malformed #pragma argument list */ + D_PRAGMA_OPTSET, /* failed to set #pragma option */ + D_PRAGMA_SCOPE, /* #pragma identifier scope error */ + D_PRAGMA_DEPEND, /* #pragma dependency not satisfied */ + D_MACRO_UNDEF, /* macro parameter is not defined */ + D_MACRO_OFLOW, /* macro parameter integer overflow */ + D_MACRO_UNUSED, /* macro parameter is never used */ + D_INT_OFLOW, /* integer constant overflow */ + D_INT_DIGIT, /* integer digit is not valid */ + D_STR_NL, /* newline in string literal */ + D_CHR_NL, /* newline in character constant */ + D_CHR_NULL, /* empty character constant */ + D_CHR_OFLOW, /* character constant is too long */ + D_IDENT_BADREF, /* identifier expected type mismatch */ + D_IDENT_UNDEF, /* identifier is not known/defined */ + D_IDENT_AMBIG, /* identifier is ambiguous (var/enum) */ + D_SYM_BADREF, /* kernel/user symbol ref mismatch */ + D_SYM_NOTYPES, /* no CTF data available for sym ref */ + D_SYM_MODEL, /* module/program data model mismatch */ + D_VAR_UNDEF, /* reference to undefined variable */ + D_VAR_UNSUP, /* unsupported variable specification */ + D_PROTO_LEN, /* prototype length mismatch */ + D_PROTO_ARG, /* prototype argument mismatch */ + D_ARGS_MULTI, /* description matches unstable set */ + D_ARGS_XLATOR, /* no args[] translator defined */ + D_ARGS_NONE, /* no args[] available */ + D_ARGS_TYPE, /* invalid args[] type */ + D_ARGS_IDX, /* invalid args[] index */ + D_REGS_IDX, /* invalid regs[] index */ + D_KEY_TYPE, /* invalid agg or array key type */ + D_PRINTF_DYN_PROTO, /* dynamic size argument missing */ + D_PRINTF_DYN_TYPE, /* dynamic size type mismatch */ + D_PRINTF_AGG_CONV, /* improper use of %@ conversion */ + D_PRINTF_ARG_PROTO, /* conversion missing value argument */ + D_PRINTF_ARG_TYPE, /* conversion arg has wrong type */ + D_PRINTF_ARG_EXTRA, /* extra arguments specified */ + D_PRINTF_ARG_FMT, /* format string is not a constant */ + D_PRINTF_FMT_EMPTY, /* format string is empty */ + D_DECL_CHARATTR, /* bad attributes for char decl */ + D_DECL_VOIDATTR, /* bad attributes for void decl */ + D_DECL_SIGNINT, /* sign/unsign with non-integer decl */ + D_DECL_LONGINT, /* long with non-arithmetic decl */ + D_DECL_IDENT, /* old-style declaration or bad type */ + D_DECL_CLASS, /* more than one storage class given */ + D_DECL_BADCLASS, /* decl class not supported in D */ + D_DECL_PARMCLASS, /* invalid class for parameter type */ + D_DECL_COMBO, /* bad decl specifier combination */ + D_DECL_ARRSUB, /* const int required for array size */ + D_DECL_ARRNULL, /* array decl requires dim or tuple */ + D_DECL_ARRBIG, /* array size too big */ + D_DECL_IDRED, /* decl identifier redeclared */ + D_DECL_TYPERED, /* decl type redeclared */ + D_DECL_MNAME, /* member name missing */ + D_DECL_SCOPE, /* scoping operator used in decl */ + D_DECL_BFCONST, /* bit-field requires const size expr */ + D_DECL_BFSIZE, /* bit-field size too big for type */ + D_DECL_BFTYPE, /* bit-field type is not valid */ + D_DECL_ENCONST, /* enum tag requires const size expr */ + D_DECL_ENOFLOW, /* enumerator value overflows INT_MAX */ + D_DECL_USELESS, /* useless external declaration */ + D_DECL_LOCASSC, /* attempt to decl local assc array */ + D_DECL_VOIDOBJ, /* attempt to decl void object */ + D_DECL_DYNOBJ, /* attempt to decl dynamic object */ + D_DECL_INCOMPLETE, /* declaration uses incomplete type */ + D_DECL_PROTO_VARARGS, /* varargs not allowed in prototype */ + D_DECL_PROTO_TYPE, /* type not allowed in prototype */ + D_DECL_PROTO_VOID, /* void must be sole parameter */ + D_DECL_PROTO_NAME, /* void parameter may not have a name */ + D_DECL_PROTO_FORM, /* parameter name has no formal */ + D_COMM_COMM, /* commit() after commit() */ + D_COMM_DREC, /* commit() after data action */ + D_SPEC_SPEC, /* speculate() after speculate() */ + D_SPEC_COMM, /* speculate() after commit() */ + D_SPEC_DREC, /* speculate() after data action */ + D_AGG_COMM, /* aggregating act after commit() */ + D_AGG_SPEC, /* aggregating act after speculate() */ + D_AGG_NULL, /* aggregation stmt has null effect */ + D_AGG_SCALAR, /* aggregating function needs scalar */ + D_ACT_SPEC, /* destructive action after speculate */ + D_EXIT_SPEC, /* exit() action after speculate */ + D_DREC_COMM, /* data action after commit() */ + D_PRINTA_PROTO, /* printa() prototype mismatch */ + D_PRINTA_AGGARG, /* aggregation arg type mismatch */ + D_PRINTA_AGGBAD, /* printa() aggregation not defined */ + D_PRINTA_AGGKEY, /* printa() aggregation key mismatch */ + D_PRINTA_AGGPROTO, /* printa() aggregation mismatch */ + D_TRACE_VOID, /* trace() argument has void type */ + D_TRACE_DYN, /* trace() argument has dynamic type */ + D_TRACE_AGG, /* trace() argument is an aggregation */ + D_PRINT_VOID, /* print() argument has void type */ + D_PRINT_DYN, /* print() argument has dynamic type */ + D_PRINT_AGG, /* print() argument is an aggregation */ + D_TRACEMEM_ADDR, /* tracemem() address bad type */ + D_TRACEMEM_SIZE, /* tracemem() size bad type */ + D_TRACEMEM_ARGS, /* tracemem() illegal number of args */ + D_TRACEMEM_DYNSIZE, /* tracemem() dynamic size bad type */ + D_STACK_PROTO, /* stack() prototype mismatch */ + D_STACK_SIZE, /* stack() size argument bad type */ + D_USTACK_FRAMES, /* ustack() frames arg bad type */ + D_USTACK_STRSIZE, /* ustack() strsize arg bad type */ + D_USTACK_PROTO, /* ustack() prototype mismatch */ + D_LQUANT_BASETYPE, /* lquantize() bad base type */ + D_LQUANT_BASEVAL, /* lquantize() bad base value */ + D_LQUANT_LIMTYPE, /* lquantize() bad limit type */ + D_LQUANT_LIMVAL, /* lquantize() bad limit value */ + D_LQUANT_MISMATCH, /* lquantize() limit < base */ + D_LQUANT_STEPTYPE, /* lquantize() bad step type */ + D_LQUANT_STEPVAL, /* lquantize() bad step value */ + D_LQUANT_STEPLARGE, /* lquantize() step too large */ + D_LQUANT_STEPSMALL, /* lquantize() step too small */ + D_QUANT_PROTO, /* quantize() prototype mismatch */ + D_PROC_OFF, /* byte offset exceeds function size */ + D_PROC_ALIGN, /* byte offset has invalid alignment */ + D_PROC_NAME, /* invalid process probe name */ + D_PROC_GRAB, /* failed to grab process */ + D_PROC_DYN, /* process is not dynamically linked */ + D_PROC_LIB, /* invalid process library name */ + D_PROC_FUNC, /* no such function in process */ + D_PROC_CREATEFAIL, /* pid probe creation failed */ + D_PROC_NODEV, /* fasttrap device is not installed */ + D_PROC_BADPID, /* user probe pid invalid */ + D_PROC_BADPROV, /* user probe provider invalid */ + D_PROC_USDT, /* problem initializing usdt */ + D_CLEAR_PROTO, /* clear() prototype mismatch */ + D_CLEAR_AGGARG, /* aggregation arg type mismatch */ + D_CLEAR_AGGBAD, /* clear() aggregation not defined */ + D_NORMALIZE_PROTO, /* normalize() prototype mismatch */ + D_NORMALIZE_SCALAR, /* normalize() value must be scalar */ + D_NORMALIZE_AGGARG, /* aggregation arg type mismatch */ + D_NORMALIZE_AGGBAD, /* normalize() aggregation not def. */ + D_TRUNC_PROTO, /* trunc() prototype mismatch */ + D_TRUNC_SCALAR, /* trunc() value must be scalar */ + D_TRUNC_AGGARG, /* aggregation arg type mismatch */ + D_TRUNC_AGGBAD, /* trunc() aggregation not def. */ + D_PROV_BADNAME, /* invalid provider name */ + D_PROV_INCOMPAT, /* provider/probe interface mismatch */ + D_PROV_PRDUP, /* duplicate probe declaration */ + D_PROV_PRARGLEN, /* probe argument list too long */ + D_PROV_PRXLATOR, /* probe argument translator missing */ + D_FREOPEN_INVALID, /* frename() filename is invalid */ + D_LQUANT_MATCHBASE, /* lquantize() mismatch on base */ + D_LQUANT_MATCHLIM, /* lquantize() mismatch on limit */ + D_LQUANT_MATCHSTEP, /* lquantize() mismatch on step */ + D_LLQUANT_FACTORTYPE, /* llquantize() bad magnitude type */ + D_LLQUANT_FACTORVAL, /* llquantize() bad magnitude value */ + D_LLQUANT_FACTORMATCH, /* llquantize() mismatch on magnitude */ + D_LLQUANT_LOWTYPE, /* llquantize() bad low mag type */ + D_LLQUANT_LOWVAL, /* llquantize() bad low mag value */ + D_LLQUANT_LOWMATCH, /* llquantize() mismatch on low mag */ + D_LLQUANT_HIGHTYPE, /* llquantize() bad high mag type */ + D_LLQUANT_HIGHVAL, /* llquantize() bad high mag value */ + D_LLQUANT_HIGHMATCH, /* llquantize() mismatch on high mag */ + D_LLQUANT_NSTEPTYPE, /* llquantize() bad # steps type */ + D_LLQUANT_NSTEPVAL, /* llquantize() bad # steps value */ + D_LLQUANT_NSTEPMATCH, /* llquantize() mismatch on # steps */ + D_LLQUANT_MAGRANGE, /* llquantize() bad magnitude range */ + D_LLQUANT_FACTORNSTEPS, /* llquantize() # steps < factor */ + D_LLQUANT_FACTOREVEN, /* llquantize() bad # steps/factor */ + D_LLQUANT_FACTORSMALL, /* llquantize() magnitude too small */ + D_LLQUANT_MAGTOOBIG, /* llquantize() high mag too large */ + D_NOREG, /* no available internal registers */ + D_PRINTM_ADDR, /* printm() memref bad type */ + D_PRINTM_SIZE, /* printm() size bad type */ + D_PRINTT_ADDR, /* printt() typeref bad type */ + D_PRINTT_SIZE /* printt() size bad type */ +} dt_errtag_t; + +extern const char *dt_errtag(dt_errtag_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_ERRTAGS_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_grammar.y b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_grammar.y new file mode 100644 index 0000000..6321b65 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_grammar.y @@ -0,0 +1,861 @@ +%{ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <dt_impl.h> + +#define OP1(op, c) dt_node_op1(op, c) +#define OP2(op, l, r) dt_node_op2(op, l, r) +#define OP3(x, y, z) dt_node_op3(x, y, z) +#define LINK(l, r) dt_node_link(l, r) +#define DUP(s) strdup(s) + +%} + +%union { + dt_node_t *l_node; + dt_decl_t *l_decl; + char *l_str; + uintmax_t l_int; + int l_tok; +} + +%token DT_TOK_COMMA DT_TOK_ELLIPSIS +%token DT_TOK_ASGN DT_TOK_ADD_EQ DT_TOK_SUB_EQ DT_TOK_MUL_EQ +%token DT_TOK_DIV_EQ DT_TOK_MOD_EQ DT_TOK_AND_EQ DT_TOK_XOR_EQ DT_TOK_OR_EQ +%token DT_TOK_LSH_EQ DT_TOK_RSH_EQ DT_TOK_QUESTION DT_TOK_COLON +%token DT_TOK_LOR DT_TOK_LXOR DT_TOK_LAND +%token DT_TOK_BOR DT_TOK_XOR DT_TOK_BAND DT_TOK_EQU DT_TOK_NEQ +%token DT_TOK_LT DT_TOK_LE DT_TOK_GT DT_TOK_GE DT_TOK_LSH DT_TOK_RSH +%token DT_TOK_ADD DT_TOK_SUB DT_TOK_MUL DT_TOK_DIV DT_TOK_MOD +%token DT_TOK_LNEG DT_TOK_BNEG DT_TOK_ADDADD DT_TOK_SUBSUB +%token DT_TOK_PREINC DT_TOK_POSTINC DT_TOK_PREDEC DT_TOK_POSTDEC +%token DT_TOK_IPOS DT_TOK_INEG DT_TOK_DEREF DT_TOK_ADDROF +%token DT_TOK_OFFSETOF DT_TOK_SIZEOF DT_TOK_STRINGOF DT_TOK_XLATE +%token DT_TOK_LPAR DT_TOK_RPAR DT_TOK_LBRAC DT_TOK_RBRAC DT_TOK_PTR DT_TOK_DOT + +%token <l_str> DT_TOK_STRING +%token <l_str> DT_TOK_IDENT +%token <l_str> DT_TOK_PSPEC +%token <l_str> DT_TOK_AGG +%token <l_str> DT_TOK_TNAME +%token <l_int> DT_TOK_INT + +%token DT_KEY_AUTO +%token DT_KEY_BREAK +%token DT_KEY_CASE +%token DT_KEY_CHAR +%token DT_KEY_CONST +%token DT_KEY_CONTINUE +%token DT_KEY_COUNTER +%token DT_KEY_DEFAULT +%token DT_KEY_DO +%token DT_KEY_DOUBLE +%token DT_KEY_ELSE +%token DT_KEY_ENUM +%token DT_KEY_EXTERN +%token DT_KEY_FLOAT +%token DT_KEY_FOR +%token DT_KEY_GOTO +%token DT_KEY_IF +%token DT_KEY_IMPORT +%token DT_KEY_INLINE +%token DT_KEY_INT +%token DT_KEY_LONG +%token DT_KEY_PROBE +%token DT_KEY_PROVIDER +%token DT_KEY_REGISTER +%token DT_KEY_RESTRICT +%token DT_KEY_RETURN +%token DT_KEY_SELF +%token DT_KEY_SHORT +%token DT_KEY_SIGNED +%token DT_KEY_STATIC +%token DT_KEY_STRING +%token DT_KEY_STRUCT +%token DT_KEY_SWITCH +%token DT_KEY_THIS +%token DT_KEY_TYPEDEF +%token DT_KEY_UNION +%token DT_KEY_UNSIGNED +%token DT_KEY_USERLAND +%token DT_KEY_VOID +%token DT_KEY_VOLATILE +%token DT_KEY_WHILE +%token DT_KEY_XLATOR + +%token DT_TOK_EPRED +%token DT_CTX_DEXPR +%token DT_CTX_DPROG +%token DT_CTX_DTYPE +%token DT_TOK_EOF 0 + +%left DT_TOK_COMMA +%right DT_TOK_ASGN DT_TOK_ADD_EQ DT_TOK_SUB_EQ DT_TOK_MUL_EQ DT_TOK_DIV_EQ + DT_TOK_MOD_EQ DT_TOK_AND_EQ DT_TOK_XOR_EQ DT_TOK_OR_EQ DT_TOK_LSH_EQ + DT_TOK_RSH_EQ +%left DT_TOK_QUESTION DT_TOK_COLON +%left DT_TOK_LOR +%left DT_TOK_LXOR +%left DT_TOK_LAND +%left DT_TOK_BOR +%left DT_TOK_XOR +%left DT_TOK_BAND +%left DT_TOK_EQU DT_TOK_NEQ +%left DT_TOK_LT DT_TOK_LE DT_TOK_GT DT_TOK_GE +%left DT_TOK_LSH DT_TOK_RSH +%left DT_TOK_ADD DT_TOK_SUB +%left DT_TOK_MUL DT_TOK_DIV DT_TOK_MOD +%right DT_TOK_LNEG DT_TOK_BNEG DT_TOK_ADDADD DT_TOK_SUBSUB + DT_TOK_IPOS DT_TOK_INEG +%right DT_TOK_DEREF DT_TOK_ADDROF DT_TOK_SIZEOF DT_TOK_STRINGOF DT_TOK_XLATE +%left DT_TOK_LPAR DT_TOK_RPAR DT_TOK_LBRAC DT_TOK_RBRAC DT_TOK_PTR DT_TOK_DOT + +%type <l_node> d_expression +%type <l_node> d_program +%type <l_node> d_type + +%type <l_node> translation_unit +%type <l_node> external_declaration +%type <l_node> inline_definition +%type <l_node> translator_definition +%type <l_node> translator_member_list +%type <l_node> translator_member +%type <l_node> provider_definition +%type <l_node> provider_probe_list +%type <l_node> provider_probe +%type <l_node> probe_definition +%type <l_node> probe_specifiers +%type <l_node> probe_specifier_list +%type <l_node> probe_specifier +%type <l_node> statement_list +%type <l_node> statement +%type <l_node> declaration +%type <l_node> init_declarator_list +%type <l_node> init_declarator + +%type <l_decl> type_specifier +%type <l_decl> type_qualifier +%type <l_decl> struct_or_union_specifier +%type <l_decl> specifier_qualifier_list +%type <l_decl> enum_specifier +%type <l_decl> declarator +%type <l_decl> direct_declarator +%type <l_decl> pointer +%type <l_decl> type_qualifier_list +%type <l_decl> type_name +%type <l_decl> abstract_declarator +%type <l_decl> direct_abstract_declarator + +%type <l_node> parameter_type_list +%type <l_node> parameter_list +%type <l_node> parameter_declaration + +%type <l_node> array +%type <l_node> array_parameters +%type <l_node> function +%type <l_node> function_parameters + +%type <l_node> expression +%type <l_node> assignment_expression +%type <l_node> conditional_expression +%type <l_node> constant_expression +%type <l_node> logical_or_expression +%type <l_node> logical_xor_expression +%type <l_node> logical_and_expression +%type <l_node> inclusive_or_expression +%type <l_node> exclusive_or_expression +%type <l_node> and_expression +%type <l_node> equality_expression +%type <l_node> relational_expression +%type <l_node> shift_expression +%type <l_node> additive_expression +%type <l_node> multiplicative_expression +%type <l_node> cast_expression +%type <l_node> unary_expression +%type <l_node> postfix_expression +%type <l_node> primary_expression +%type <l_node> argument_expression_list + +%type <l_tok> assignment_operator +%type <l_tok> unary_operator +%type <l_tok> struct_or_union + +%type <l_str> dtrace_keyword_ident + +%% + +dtrace_program: d_expression DT_TOK_EOF { return (dt_node_root($1)); } + | d_program DT_TOK_EOF { return (dt_node_root($1)); } + | d_type DT_TOK_EOF { return (dt_node_root($1)); } + ; + +d_expression: DT_CTX_DEXPR { $$ = NULL; } + | DT_CTX_DEXPR expression { $$ = $2; } + ; + +d_program: DT_CTX_DPROG { $$ = dt_node_program(NULL); } + | DT_CTX_DPROG translation_unit { $$ = dt_node_program($2); } + ; + +d_type: DT_CTX_DTYPE { $$ = NULL; } + | DT_CTX_DTYPE type_name { $$ = (dt_node_t *)$2; } + ; + +translation_unit: + external_declaration + | translation_unit external_declaration { $$ = LINK($1, $2); } + ; + +external_declaration: + inline_definition + | translator_definition + | provider_definition + | probe_definition + | declaration + ; + +inline_definition: + DT_KEY_INLINE declaration_specifiers declarator + { dt_scope_push(NULL, CTF_ERR); } DT_TOK_ASGN + assignment_expression ';' { + /* + * We push a new declaration scope before shifting the + * assignment_expression in order to preserve ds_class + * and ds_ident for use in dt_node_inline(). Once the + * entire inline_definition rule is matched, pop the + * scope and construct the inline using the saved decl. + */ + dt_scope_pop(); + $$ = dt_node_inline($6); + } + ; + +translator_definition: + DT_KEY_XLATOR type_name DT_TOK_LT type_name + DT_TOK_IDENT DT_TOK_GT '{' translator_member_list '}' ';' { + $$ = dt_node_xlator($2, $4, $5, $8); + } + | DT_KEY_XLATOR type_name DT_TOK_LT type_name + DT_TOK_IDENT DT_TOK_GT '{' '}' ';' { + $$ = dt_node_xlator($2, $4, $5, NULL); + } + ; + +translator_member_list: + translator_member + | translator_member_list translator_member { $$ = LINK($1,$2); } + ; + +translator_member: + DT_TOK_IDENT DT_TOK_ASGN assignment_expression ';' { + $$ = dt_node_member(NULL, $1, $3); + } + ; + +provider_definition: + DT_KEY_PROVIDER DT_TOK_IDENT '{' provider_probe_list '}' ';' { + $$ = dt_node_provider($2, $4); + } + | DT_KEY_PROVIDER DT_TOK_IDENT '{' '}' ';' { + $$ = dt_node_provider($2, NULL); + } + ; + +provider_probe_list: + provider_probe + | provider_probe_list provider_probe { $$ = LINK($1, $2); } + ; + +provider_probe: + DT_KEY_PROBE DT_TOK_IDENT function DT_TOK_COLON function ';' { + $$ = dt_node_probe($2, 2, $3, $5); + } + | DT_KEY_PROBE DT_TOK_IDENT function ';' { + $$ = dt_node_probe($2, 1, $3, NULL); + } + ; + + +probe_definition: + probe_specifiers { + /* + * If the input stream is a file, do not permit a probe + * specification without / <pred> / or { <act> } after + * it. This can only occur if the next token is EOF or + * an ambiguous predicate was slurped up as a comment. + * We cannot perform this check if input() is a string + * because dtrace(1M) [-fmnP] also use the compiler and + * things like dtrace -n BEGIN have to be accepted. + */ + if (yypcb->pcb_fileptr != NULL) { + dnerror($1, D_SYNTAX, "expected predicate and/" + "or actions following probe description\n"); + } + $$ = dt_node_clause($1, NULL, NULL); + } + | probe_specifiers '{' statement_list '}' { + $$ = dt_node_clause($1, NULL, $3); + } + | probe_specifiers DT_TOK_DIV expression DT_TOK_EPRED { + dnerror($3, D_SYNTAX, "expected actions { } following " + "probe description and predicate\n"); + } + | probe_specifiers DT_TOK_DIV expression DT_TOK_EPRED + '{' statement_list '}' { + $$ = dt_node_clause($1, $3, $6); + } + ; + +probe_specifiers: + probe_specifier_list { yybegin(YYS_EXPR); $$ = $1; } + ; + +probe_specifier_list: + probe_specifier + | probe_specifier_list DT_TOK_COMMA probe_specifier { + $$ = LINK($1, $3); + } + ; + +probe_specifier: + DT_TOK_PSPEC { $$ = dt_node_pdesc_by_name($1); } + | DT_TOK_INT { $$ = dt_node_pdesc_by_id($1); } + ; + +statement_list: statement { $$ = $1; } + | statement_list ';' statement { $$ = LINK($1, $3); } + ; + +statement: /* empty */ { $$ = NULL; } + | expression { $$ = dt_node_statement($1); } + ; + +argument_expression_list: + assignment_expression + | argument_expression_list DT_TOK_COMMA assignment_expression { + $$ = LINK($1, $3); + } + ; + +primary_expression: + DT_TOK_IDENT { $$ = dt_node_ident($1); } + | DT_TOK_AGG { $$ = dt_node_ident($1); } + | DT_TOK_INT { $$ = dt_node_int($1); } + | DT_TOK_STRING { $$ = dt_node_string($1); } + | DT_KEY_SELF { $$ = dt_node_ident(DUP("self")); } + | DT_KEY_THIS { $$ = dt_node_ident(DUP("this")); } + | DT_TOK_LPAR expression DT_TOK_RPAR { $$ = $2; } + ; + +postfix_expression: + primary_expression + | postfix_expression + DT_TOK_LBRAC argument_expression_list DT_TOK_RBRAC { + $$ = OP2(DT_TOK_LBRAC, $1, $3); + } + | postfix_expression DT_TOK_LPAR DT_TOK_RPAR { + $$ = dt_node_func($1, NULL); + } + | postfix_expression + DT_TOK_LPAR argument_expression_list DT_TOK_RPAR { + $$ = dt_node_func($1, $3); + } + | postfix_expression DT_TOK_DOT DT_TOK_IDENT { + $$ = OP2(DT_TOK_DOT, $1, dt_node_ident($3)); + } + | postfix_expression DT_TOK_DOT DT_TOK_TNAME { + $$ = OP2(DT_TOK_DOT, $1, dt_node_ident($3)); + } + | postfix_expression DT_TOK_DOT dtrace_keyword_ident { + $$ = OP2(DT_TOK_DOT, $1, dt_node_ident($3)); + } + | postfix_expression DT_TOK_PTR DT_TOK_IDENT { + $$ = OP2(DT_TOK_PTR, $1, dt_node_ident($3)); + } + | postfix_expression DT_TOK_PTR DT_TOK_TNAME { + $$ = OP2(DT_TOK_PTR, $1, dt_node_ident($3)); + } + | postfix_expression DT_TOK_PTR dtrace_keyword_ident { + $$ = OP2(DT_TOK_PTR, $1, dt_node_ident($3)); + } + | postfix_expression DT_TOK_ADDADD { + $$ = OP1(DT_TOK_POSTINC, $1); + } + | postfix_expression DT_TOK_SUBSUB { + $$ = OP1(DT_TOK_POSTDEC, $1); + } + | DT_TOK_OFFSETOF DT_TOK_LPAR type_name DT_TOK_COMMA + DT_TOK_IDENT DT_TOK_RPAR { + $$ = dt_node_offsetof($3, $5); + } + | DT_TOK_OFFSETOF DT_TOK_LPAR type_name DT_TOK_COMMA + DT_TOK_TNAME DT_TOK_RPAR { + $$ = dt_node_offsetof($3, $5); + } + | DT_TOK_OFFSETOF DT_TOK_LPAR type_name DT_TOK_COMMA + dtrace_keyword_ident DT_TOK_RPAR { + $$ = dt_node_offsetof($3, $5); + } + | DT_TOK_XLATE DT_TOK_LT type_name DT_TOK_GT + DT_TOK_LPAR expression DT_TOK_RPAR { + $$ = OP2(DT_TOK_XLATE, dt_node_type($3), $6); + } + ; + +unary_expression: + postfix_expression + | DT_TOK_ADDADD unary_expression { $$ = OP1(DT_TOK_PREINC, $2); } + | DT_TOK_SUBSUB unary_expression { $$ = OP1(DT_TOK_PREDEC, $2); } + | unary_operator cast_expression { $$ = OP1($1, $2); } + | DT_TOK_SIZEOF unary_expression { $$ = OP1(DT_TOK_SIZEOF, $2); } + | DT_TOK_SIZEOF DT_TOK_LPAR type_name DT_TOK_RPAR { + $$ = OP1(DT_TOK_SIZEOF, dt_node_type($3)); + } + | DT_TOK_STRINGOF unary_expression { + $$ = OP1(DT_TOK_STRINGOF, $2); + } + ; + +unary_operator: DT_TOK_BAND { $$ = DT_TOK_ADDROF; } + | DT_TOK_MUL { $$ = DT_TOK_DEREF; } + | DT_TOK_ADD { $$ = DT_TOK_IPOS; } + | DT_TOK_SUB { $$ = DT_TOK_INEG; } + | DT_TOK_BNEG { $$ = DT_TOK_BNEG; } + | DT_TOK_LNEG { $$ = DT_TOK_LNEG; } + ; + +cast_expression: + unary_expression + | DT_TOK_LPAR type_name DT_TOK_RPAR cast_expression { + $$ = OP2(DT_TOK_LPAR, dt_node_type($2), $4); + } + ; + +multiplicative_expression: + cast_expression + | multiplicative_expression DT_TOK_MUL cast_expression { + $$ = OP2(DT_TOK_MUL, $1, $3); + } + | multiplicative_expression DT_TOK_DIV cast_expression { + $$ = OP2(DT_TOK_DIV, $1, $3); + } + | multiplicative_expression DT_TOK_MOD cast_expression { + $$ = OP2(DT_TOK_MOD, $1, $3); + } + ; + +additive_expression: + multiplicative_expression + | additive_expression DT_TOK_ADD multiplicative_expression { + $$ = OP2(DT_TOK_ADD, $1, $3); + } + | additive_expression DT_TOK_SUB multiplicative_expression { + $$ = OP2(DT_TOK_SUB, $1, $3); + } + ; + +shift_expression: + additive_expression + | shift_expression DT_TOK_LSH additive_expression { + $$ = OP2(DT_TOK_LSH, $1, $3); + } + | shift_expression DT_TOK_RSH additive_expression { + $$ = OP2(DT_TOK_RSH, $1, $3); + } + ; + +relational_expression: + shift_expression + | relational_expression DT_TOK_LT shift_expression { + $$ = OP2(DT_TOK_LT, $1, $3); + } + | relational_expression DT_TOK_GT shift_expression { + $$ = OP2(DT_TOK_GT, $1, $3); + } + | relational_expression DT_TOK_LE shift_expression { + $$ = OP2(DT_TOK_LE, $1, $3); + } + | relational_expression DT_TOK_GE shift_expression { + $$ = OP2(DT_TOK_GE, $1, $3); + } + ; + +equality_expression: + relational_expression + | equality_expression DT_TOK_EQU relational_expression { + $$ = OP2(DT_TOK_EQU, $1, $3); + } + | equality_expression DT_TOK_NEQ relational_expression { + $$ = OP2(DT_TOK_NEQ, $1, $3); + } + ; + +and_expression: + equality_expression + | and_expression DT_TOK_BAND equality_expression { + $$ = OP2(DT_TOK_BAND, $1, $3); + } + ; + +exclusive_or_expression: + and_expression + | exclusive_or_expression DT_TOK_XOR and_expression { + $$ = OP2(DT_TOK_XOR, $1, $3); + } + ; + +inclusive_or_expression: + exclusive_or_expression + | inclusive_or_expression DT_TOK_BOR exclusive_or_expression { + $$ = OP2(DT_TOK_BOR, $1, $3); + } + ; + +logical_and_expression: + inclusive_or_expression + | logical_and_expression DT_TOK_LAND inclusive_or_expression { + $$ = OP2(DT_TOK_LAND, $1, $3); + } + ; + +logical_xor_expression: + logical_and_expression + | logical_xor_expression DT_TOK_LXOR logical_and_expression { + $$ = OP2(DT_TOK_LXOR, $1, $3); + } + ; + +logical_or_expression: + logical_xor_expression + | logical_or_expression DT_TOK_LOR logical_xor_expression { + $$ = OP2(DT_TOK_LOR, $1, $3); + } + ; + +constant_expression: conditional_expression + ; + +conditional_expression: + logical_or_expression + | logical_or_expression DT_TOK_QUESTION expression DT_TOK_COLON + conditional_expression { $$ = OP3($1, $3, $5); } + ; + +assignment_expression: + conditional_expression + | unary_expression assignment_operator assignment_expression { + $$ = OP2($2, $1, $3); + } + ; + +assignment_operator: + DT_TOK_ASGN { $$ = DT_TOK_ASGN; } + | DT_TOK_MUL_EQ { $$ = DT_TOK_MUL_EQ; } + | DT_TOK_DIV_EQ { $$ = DT_TOK_DIV_EQ; } + | DT_TOK_MOD_EQ { $$ = DT_TOK_MOD_EQ; } + | DT_TOK_ADD_EQ { $$ = DT_TOK_ADD_EQ; } + | DT_TOK_SUB_EQ { $$ = DT_TOK_SUB_EQ; } + | DT_TOK_LSH_EQ { $$ = DT_TOK_LSH_EQ; } + | DT_TOK_RSH_EQ { $$ = DT_TOK_RSH_EQ; } + | DT_TOK_AND_EQ { $$ = DT_TOK_AND_EQ; } + | DT_TOK_XOR_EQ { $$ = DT_TOK_XOR_EQ; } + | DT_TOK_OR_EQ { $$ = DT_TOK_OR_EQ; } + ; + +expression: assignment_expression + | expression DT_TOK_COMMA assignment_expression { + $$ = OP2(DT_TOK_COMMA, $1, $3); + } + ; + +declaration: declaration_specifiers ';' { + $$ = dt_node_decl(); + dt_decl_free(dt_decl_pop()); + yybegin(YYS_CLAUSE); + } + | declaration_specifiers init_declarator_list ';' { + $$ = $2; + dt_decl_free(dt_decl_pop()); + yybegin(YYS_CLAUSE); + } + ; + +declaration_specifiers: + d_storage_class_specifier + | d_storage_class_specifier declaration_specifiers + | type_specifier + | type_specifier declaration_specifiers + | type_qualifier + | type_qualifier declaration_specifiers + ; + +parameter_declaration_specifiers: + storage_class_specifier + | storage_class_specifier declaration_specifiers + | type_specifier + | type_specifier declaration_specifiers + | type_qualifier + | type_qualifier declaration_specifiers + ; + +storage_class_specifier: + DT_KEY_AUTO { dt_decl_class(DT_DC_AUTO); } + | DT_KEY_REGISTER { dt_decl_class(DT_DC_REGISTER); } + | DT_KEY_STATIC { dt_decl_class(DT_DC_STATIC); } + | DT_KEY_EXTERN { dt_decl_class(DT_DC_EXTERN); } + | DT_KEY_TYPEDEF { dt_decl_class(DT_DC_TYPEDEF); } + ; + +d_storage_class_specifier: + storage_class_specifier + | DT_KEY_SELF { dt_decl_class(DT_DC_SELF); } + | DT_KEY_THIS { dt_decl_class(DT_DC_THIS); } + ; + +type_specifier: DT_KEY_VOID { $$ = dt_decl_spec(CTF_K_INTEGER, DUP("void")); } + | DT_KEY_CHAR { $$ = dt_decl_spec(CTF_K_INTEGER, DUP("char")); } + | DT_KEY_SHORT { $$ = dt_decl_attr(DT_DA_SHORT); } + | DT_KEY_INT { $$ = dt_decl_spec(CTF_K_INTEGER, DUP("int")); } + | DT_KEY_LONG { $$ = dt_decl_attr(DT_DA_LONG); } + | DT_KEY_FLOAT { $$ = dt_decl_spec(CTF_K_FLOAT, DUP("float")); } + | DT_KEY_DOUBLE { $$ = dt_decl_spec(CTF_K_FLOAT, DUP("double")); } + | DT_KEY_SIGNED { $$ = dt_decl_attr(DT_DA_SIGNED); } + | DT_KEY_UNSIGNED { $$ = dt_decl_attr(DT_DA_UNSIGNED); } + | DT_KEY_USERLAND { $$ = dt_decl_attr(DT_DA_USER); } + | DT_KEY_STRING { + $$ = dt_decl_spec(CTF_K_TYPEDEF, DUP("string")); + } + | DT_TOK_TNAME { $$ = dt_decl_spec(CTF_K_TYPEDEF, $1); } + | struct_or_union_specifier + | enum_specifier + ; + +type_qualifier: DT_KEY_CONST { $$ = dt_decl_attr(DT_DA_CONST); } + | DT_KEY_RESTRICT { $$ = dt_decl_attr(DT_DA_RESTRICT); } + | DT_KEY_VOLATILE { $$ = dt_decl_attr(DT_DA_VOLATILE); } + ; + +struct_or_union_specifier: + struct_or_union_definition struct_declaration_list '}' { + $$ = dt_scope_pop(); + } + | struct_or_union DT_TOK_IDENT { $$ = dt_decl_spec($1, $2); } + | struct_or_union DT_TOK_TNAME { $$ = dt_decl_spec($1, $2); } + ; + +struct_or_union_definition: + struct_or_union '{' { dt_decl_sou($1, NULL); } + | struct_or_union DT_TOK_IDENT '{' { dt_decl_sou($1, $2); } + | struct_or_union DT_TOK_TNAME '{' { dt_decl_sou($1, $2); } + ; + +struct_or_union: + DT_KEY_STRUCT { $$ = CTF_K_STRUCT; } + | DT_KEY_UNION { $$ = CTF_K_UNION; } + ; + +struct_declaration_list: + struct_declaration + | struct_declaration_list struct_declaration + ; + +init_declarator_list: + init_declarator + | init_declarator_list DT_TOK_COMMA init_declarator { + $$ = LINK($1, $3); + } + ; + +init_declarator: + declarator { + $$ = dt_node_decl(); + dt_decl_reset(); + } + ; + +struct_declaration: + specifier_qualifier_list struct_declarator_list ';' { + dt_decl_free(dt_decl_pop()); + } + ; + +specifier_qualifier_list: + type_specifier + | type_specifier specifier_qualifier_list { $$ = $2; } + | type_qualifier + | type_qualifier specifier_qualifier_list { $$ = $2; } + ; + +struct_declarator_list: + struct_declarator + | struct_declarator_list DT_TOK_COMMA struct_declarator + ; + +struct_declarator: + declarator { dt_decl_member(NULL); } + | DT_TOK_COLON constant_expression { dt_decl_member($2); } + | declarator DT_TOK_COLON constant_expression { + dt_decl_member($3); + } + ; + +enum_specifier: + enum_definition enumerator_list '}' { $$ = dt_scope_pop(); } + | DT_KEY_ENUM DT_TOK_IDENT { $$ = dt_decl_spec(CTF_K_ENUM, $2); } + | DT_KEY_ENUM DT_TOK_TNAME { $$ = dt_decl_spec(CTF_K_ENUM, $2); } + ; + +enum_definition: + DT_KEY_ENUM '{' { dt_decl_enum(NULL); } + | DT_KEY_ENUM DT_TOK_IDENT '{' { dt_decl_enum($2); } + | DT_KEY_ENUM DT_TOK_TNAME '{' { dt_decl_enum($2); } + ; + +enumerator_list: + enumerator + | enumerator_list DT_TOK_COMMA enumerator + ; + +enumerator: DT_TOK_IDENT { dt_decl_enumerator($1, NULL); } + | DT_TOK_IDENT DT_TOK_ASGN expression { + dt_decl_enumerator($1, $3); + } + ; + +declarator: direct_declarator + | pointer direct_declarator + ; + +direct_declarator: + DT_TOK_IDENT { $$ = dt_decl_ident($1); } + | lparen declarator DT_TOK_RPAR { $$ = $2; } + | direct_declarator array { dt_decl_array($2); } + | direct_declarator function { dt_decl_func($1, $2); } + ; + +lparen: DT_TOK_LPAR { dt_decl_top()->dd_attr |= DT_DA_PAREN; } + ; + +pointer: DT_TOK_MUL { $$ = dt_decl_ptr(); } + | DT_TOK_MUL type_qualifier_list { $$ = dt_decl_ptr(); } + | DT_TOK_MUL pointer { $$ = dt_decl_ptr(); } + | DT_TOK_MUL type_qualifier_list pointer { $$ = dt_decl_ptr(); } + ; + +type_qualifier_list: + type_qualifier + | type_qualifier_list type_qualifier { $$ = $2; } + ; + +parameter_type_list: + parameter_list + | DT_TOK_ELLIPSIS { $$ = dt_node_vatype(); } + | parameter_list DT_TOK_COMMA DT_TOK_ELLIPSIS { + $$ = LINK($1, dt_node_vatype()); + } + ; + +parameter_list: parameter_declaration + | parameter_list DT_TOK_COMMA parameter_declaration { + $$ = LINK($1, $3); + } + ; + +parameter_declaration: + parameter_declaration_specifiers { + $$ = dt_node_type(NULL); + } + | parameter_declaration_specifiers declarator { + $$ = dt_node_type(NULL); + } + | parameter_declaration_specifiers abstract_declarator { + $$ = dt_node_type(NULL); + } + ; + +type_name: specifier_qualifier_list { + $$ = dt_decl_pop(); + } + | specifier_qualifier_list abstract_declarator { + $$ = dt_decl_pop(); + } + ; + +abstract_declarator: + pointer + | direct_abstract_declarator + | pointer direct_abstract_declarator + ; + +direct_abstract_declarator: + lparen abstract_declarator DT_TOK_RPAR { $$ = $2; } + | direct_abstract_declarator array { dt_decl_array($2); } + | array { dt_decl_array($1); $$ = NULL; } + | direct_abstract_declarator function { dt_decl_func($1, $2); } + | function { dt_decl_func(NULL, $1); } + ; + +array: DT_TOK_LBRAC { dt_scope_push(NULL, CTF_ERR); } + array_parameters DT_TOK_RBRAC { + dt_scope_pop(); + $$ = $3; + } + ; + +array_parameters: + /* empty */ { $$ = NULL; } + | constant_expression { $$ = $1; } + | parameter_type_list { $$ = $1; } + ; + +function: DT_TOK_LPAR { dt_scope_push(NULL, CTF_ERR); } + function_parameters DT_TOK_RPAR { + dt_scope_pop(); + $$ = $3; + } + ; + +function_parameters: + /* empty */ { $$ = NULL; } + | parameter_type_list { $$ = $1; } + ; + +dtrace_keyword_ident: + DT_KEY_PROBE { $$ = DUP("probe"); } + | DT_KEY_PROVIDER { $$ = DUP("provider"); } + | DT_KEY_SELF { $$ = DUP("self"); } + | DT_KEY_STRING { $$ = DUP("string"); } + | DT_TOK_STRINGOF { $$ = DUP("stringof"); } + | DT_KEY_USERLAND { $$ = DUP("userland"); } + | DT_TOK_XLATE { $$ = DUP("xlate"); } + | DT_KEY_XLATOR { $$ = DUP("translator"); } + ; + +%% diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_handle.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_handle.c new file mode 100644 index 0000000..fe1ec7a --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_handle.c @@ -0,0 +1,485 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stddef.h> +#include <stdlib.h> +#include <strings.h> +#include <errno.h> +#include <unistd.h> +#include <assert.h> +#if defined(sun) +#include <alloca.h> +#endif + +#include <dt_impl.h> +#include <dt_program.h> + +static const char _dt_errprog[] = +"dtrace:::ERROR" +"{" +" trace(arg1);" +" trace(arg2);" +" trace(arg3);" +" trace(arg4);" +" trace(arg5);" +"}"; + +int +dtrace_handle_err(dtrace_hdl_t *dtp, dtrace_handle_err_f *hdlr, void *arg) +{ + dtrace_prog_t *pgp = NULL; + dt_stmt_t *stp; + dtrace_ecbdesc_t *edp; + + /* + * We don't currently support multiple error handlers. + */ + if (dtp->dt_errhdlr != NULL) + return (dt_set_errno(dtp, EALREADY)); + + /* + * If the DTRACEOPT_GRABANON is enabled, the anonymous enabling will + * already have a dtrace:::ERROR probe enabled; save 'hdlr' and 'arg' + * but do not bother compiling and enabling _dt_errprog. + */ + if (dtp->dt_options[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) + goto out; + + if ((pgp = dtrace_program_strcompile(dtp, _dt_errprog, + DTRACE_PROBESPEC_NAME, DTRACE_C_ZDEFS, 0, NULL)) == NULL) + return (dt_set_errno(dtp, dtrace_errno(dtp))); + + stp = dt_list_next(&pgp->dp_stmts); + assert(stp != NULL); + + edp = stp->ds_desc->dtsd_ecbdesc; + assert(edp != NULL); + edp->dted_uarg = DT_ECB_ERROR; + +out: + dtp->dt_errhdlr = hdlr; + dtp->dt_errarg = arg; + dtp->dt_errprog = pgp; + + return (0); +} + +int +dtrace_handle_drop(dtrace_hdl_t *dtp, dtrace_handle_drop_f *hdlr, void *arg) +{ + if (dtp->dt_drophdlr != NULL) + return (dt_set_errno(dtp, EALREADY)); + + dtp->dt_drophdlr = hdlr; + dtp->dt_droparg = arg; + + return (0); +} + +int +dtrace_handle_proc(dtrace_hdl_t *dtp, dtrace_handle_proc_f *hdlr, void *arg) +{ + if (dtp->dt_prochdlr != NULL) + return (dt_set_errno(dtp, EALREADY)); + + dtp->dt_prochdlr = hdlr; + dtp->dt_procarg = arg; + + return (0); +} + +int +dtrace_handle_buffered(dtrace_hdl_t *dtp, dtrace_handle_buffered_f *hdlr, + void *arg) +{ + if (dtp->dt_bufhdlr != NULL) + return (dt_set_errno(dtp, EALREADY)); + + if (hdlr == NULL) + return (dt_set_errno(dtp, EINVAL)); + + dtp->dt_bufhdlr = hdlr; + dtp->dt_bufarg = arg; + + return (0); +} + +int +dtrace_handle_setopt(dtrace_hdl_t *dtp, dtrace_handle_setopt_f *hdlr, + void *arg) +{ + if (hdlr == NULL) + return (dt_set_errno(dtp, EINVAL)); + + dtp->dt_setopthdlr = hdlr; + dtp->dt_setoptarg = arg; + + return (0); +} + +#define DT_REC(type, ndx) *((type *)((uintptr_t)data->dtpda_data + \ + epd->dtepd_rec[(ndx)].dtrd_offset)) + +static int +dt_handle_err(dtrace_hdl_t *dtp, dtrace_probedata_t *data) +{ + dtrace_eprobedesc_t *epd = data->dtpda_edesc, *errepd; + dtrace_probedesc_t *pd = data->dtpda_pdesc, *errpd; + dtrace_errdata_t err; + dtrace_epid_t epid; + + char where[30]; + char details[30]; + char offinfo[30]; + const int slop = 80; + const char *faultstr; + char *str; + int len; + + assert(epd->dtepd_uarg == DT_ECB_ERROR); + + if (epd->dtepd_nrecs != 5 || strcmp(pd->dtpd_provider, "dtrace") != 0 || + strcmp(pd->dtpd_name, "ERROR") != 0) + return (dt_set_errno(dtp, EDT_BADERROR)); + + /* + * This is an error. We have the following items here: EPID, + * faulting action, DIF offset, fault code and faulting address. + */ + epid = (uint32_t)DT_REC(uint64_t, 0); + + if (dt_epid_lookup(dtp, epid, &errepd, &errpd) != 0) + return (dt_set_errno(dtp, EDT_BADERROR)); + + err.dteda_edesc = errepd; + err.dteda_pdesc = errpd; + err.dteda_cpu = data->dtpda_cpu; + err.dteda_action = (int)DT_REC(uint64_t, 1); + err.dteda_offset = (int)DT_REC(uint64_t, 2); + err.dteda_fault = (int)DT_REC(uint64_t, 3); + err.dteda_addr = DT_REC(uint64_t, 4); + + faultstr = dtrace_faultstr(dtp, err.dteda_fault); + len = sizeof (where) + sizeof (offinfo) + strlen(faultstr) + + strlen(errpd->dtpd_provider) + strlen(errpd->dtpd_mod) + + strlen(errpd->dtpd_name) + strlen(errpd->dtpd_func) + + slop; + + str = (char *)alloca(len); + + if (err.dteda_action == 0) { + (void) sprintf(where, "predicate"); + } else { + (void) sprintf(where, "action #%d", err.dteda_action); + } + + if (err.dteda_offset != -1) { + (void) sprintf(offinfo, " at DIF offset %d", err.dteda_offset); + } else { + offinfo[0] = 0; + } + + switch (err.dteda_fault) { + case DTRACEFLT_BADADDR: + case DTRACEFLT_BADALIGN: + case DTRACEFLT_BADSTACK: + (void) sprintf(details, " (0x%llx)", + (u_longlong_t)err.dteda_addr); + break; + + default: + details[0] = 0; + } + + (void) snprintf(str, len, "error on enabled probe ID %u " + "(ID %u: %s:%s:%s:%s): %s%s in %s%s\n", + epid, errpd->dtpd_id, errpd->dtpd_provider, + errpd->dtpd_mod, errpd->dtpd_func, + errpd->dtpd_name, dtrace_faultstr(dtp, err.dteda_fault), + details, where, offinfo); + + err.dteda_msg = str; + + if (dtp->dt_errhdlr == NULL) + return (dt_set_errno(dtp, EDT_ERRABORT)); + + if ((*dtp->dt_errhdlr)(&err, dtp->dt_errarg) == DTRACE_HANDLE_ABORT) + return (dt_set_errno(dtp, EDT_ERRABORT)); + + return (0); +} + +int +dt_handle_liberr(dtrace_hdl_t *dtp, const dtrace_probedata_t *data, + const char *faultstr) +{ + dtrace_probedesc_t *errpd = data->dtpda_pdesc; + dtrace_errdata_t err; + const int slop = 80; + char *str; + int len; + + err.dteda_edesc = data->dtpda_edesc; + err.dteda_pdesc = errpd; + err.dteda_cpu = data->dtpda_cpu; + err.dteda_action = -1; + err.dteda_offset = -1; + err.dteda_fault = DTRACEFLT_LIBRARY; + err.dteda_addr = 0; + + len = strlen(faultstr) + + strlen(errpd->dtpd_provider) + strlen(errpd->dtpd_mod) + + strlen(errpd->dtpd_name) + strlen(errpd->dtpd_func) + + slop; + + str = alloca(len); + + (void) snprintf(str, len, "error on enabled probe ID %u " + "(ID %u: %s:%s:%s:%s): %s\n", + data->dtpda_edesc->dtepd_epid, + errpd->dtpd_id, errpd->dtpd_provider, + errpd->dtpd_mod, errpd->dtpd_func, + errpd->dtpd_name, faultstr); + + err.dteda_msg = str; + + if (dtp->dt_errhdlr == NULL) + return (dt_set_errno(dtp, EDT_ERRABORT)); + + if ((*dtp->dt_errhdlr)(&err, dtp->dt_errarg) == DTRACE_HANDLE_ABORT) + return (dt_set_errno(dtp, EDT_ERRABORT)); + + return (0); +} + +#define DROPTAG(x) x, #x + +static const struct { + dtrace_dropkind_t dtdrg_kind; + char *dtdrg_tag; +} _dt_droptags[] = { + { DROPTAG(DTRACEDROP_PRINCIPAL) }, + { DROPTAG(DTRACEDROP_AGGREGATION) }, + { DROPTAG(DTRACEDROP_DYNAMIC) }, + { DROPTAG(DTRACEDROP_DYNRINSE) }, + { DROPTAG(DTRACEDROP_DYNDIRTY) }, + { DROPTAG(DTRACEDROP_SPEC) }, + { DROPTAG(DTRACEDROP_SPECBUSY) }, + { DROPTAG(DTRACEDROP_SPECUNAVAIL) }, + { DROPTAG(DTRACEDROP_DBLERROR) }, + { DROPTAG(DTRACEDROP_STKSTROVERFLOW) }, + { 0, NULL } +}; + +static const char * +dt_droptag(dtrace_dropkind_t kind) +{ + int i; + + for (i = 0; _dt_droptags[i].dtdrg_tag != NULL; i++) { + if (_dt_droptags[i].dtdrg_kind == kind) + return (_dt_droptags[i].dtdrg_tag); + } + + return ("DTRACEDROP_UNKNOWN"); +} + +int +dt_handle_cpudrop(dtrace_hdl_t *dtp, processorid_t cpu, + dtrace_dropkind_t what, uint64_t howmany) +{ + dtrace_dropdata_t drop; + char str[80], *s; + int size; + + assert(what == DTRACEDROP_PRINCIPAL || what == DTRACEDROP_AGGREGATION); + + bzero(&drop, sizeof (drop)); + drop.dtdda_handle = dtp; + drop.dtdda_cpu = cpu; + drop.dtdda_kind = what; + drop.dtdda_drops = howmany; + drop.dtdda_msg = str; + + if (dtp->dt_droptags) { + (void) snprintf(str, sizeof (str), "[%s] ", dt_droptag(what)); + s = &str[strlen(str)]; + size = sizeof (str) - (s - str); + } else { + s = str; + size = sizeof (str); + } + + (void) snprintf(s, size, "%llu %sdrop%s on CPU %d\n", + (u_longlong_t)howmany, + what == DTRACEDROP_PRINCIPAL ? "" : "aggregation ", + howmany > 1 ? "s" : "", cpu); + + if (dtp->dt_drophdlr == NULL) + return (dt_set_errno(dtp, EDT_DROPABORT)); + + if ((*dtp->dt_drophdlr)(&drop, dtp->dt_droparg) == DTRACE_HANDLE_ABORT) + return (dt_set_errno(dtp, EDT_DROPABORT)); + + return (0); +} + +static const struct { + dtrace_dropkind_t dtdrt_kind; + uintptr_t dtdrt_offset; + const char *dtdrt_str; + const char *dtdrt_msg; +} _dt_droptab[] = { + { DTRACEDROP_DYNAMIC, + offsetof(dtrace_status_t, dtst_dyndrops), + "dynamic variable drop" }, + + { DTRACEDROP_DYNRINSE, + offsetof(dtrace_status_t, dtst_dyndrops_rinsing), + "dynamic variable drop", " with non-empty rinsing list" }, + + { DTRACEDROP_DYNDIRTY, + offsetof(dtrace_status_t, dtst_dyndrops_dirty), + "dynamic variable drop", " with non-empty dirty list" }, + + { DTRACEDROP_SPEC, + offsetof(dtrace_status_t, dtst_specdrops), + "speculative drop" }, + + { DTRACEDROP_SPECBUSY, + offsetof(dtrace_status_t, dtst_specdrops_busy), + "failed speculation", " (available buffer(s) still busy)" }, + + { DTRACEDROP_SPECUNAVAIL, + offsetof(dtrace_status_t, dtst_specdrops_unavail), + "failed speculation", " (no speculative buffer available)" }, + + { DTRACEDROP_STKSTROVERFLOW, + offsetof(dtrace_status_t, dtst_stkstroverflows), + "jstack()/ustack() string table overflow" }, + + { DTRACEDROP_DBLERROR, + offsetof(dtrace_status_t, dtst_dblerrors), + "error", " in ERROR probe enabling" }, + + { 0, 0, NULL } +}; + +int +dt_handle_status(dtrace_hdl_t *dtp, dtrace_status_t *old, dtrace_status_t *new) +{ + dtrace_dropdata_t drop; + char str[80], *s; + uintptr_t base = (uintptr_t)new, obase = (uintptr_t)old; + int i, size; + + bzero(&drop, sizeof (drop)); + drop.dtdda_handle = dtp; + drop.dtdda_cpu = DTRACE_CPUALL; + drop.dtdda_msg = str; + + /* + * First, check to see if we've been killed -- in which case we abort. + */ + if (new->dtst_killed && !old->dtst_killed) + return (dt_set_errno(dtp, EDT_BRICKED)); + + for (i = 0; _dt_droptab[i].dtdrt_str != NULL; i++) { + uintptr_t naddr = base + _dt_droptab[i].dtdrt_offset; + uintptr_t oaddr = obase + _dt_droptab[i].dtdrt_offset; + + uint64_t nval = *((uint64_t *)naddr); + uint64_t oval = *((uint64_t *)oaddr); + + if (nval == oval) + continue; + + if (dtp->dt_droptags) { + (void) snprintf(str, sizeof (str), "[%s] ", + dt_droptag(_dt_droptab[i].dtdrt_kind)); + s = &str[strlen(str)]; + size = sizeof (str) - (s - str); + } else { + s = str; + size = sizeof (str); + } + + (void) snprintf(s, size, "%llu %s%s%s\n", + (u_longlong_t)(nval - oval), + _dt_droptab[i].dtdrt_str, (nval - oval > 1) ? "s" : "", + _dt_droptab[i].dtdrt_msg != NULL ? + _dt_droptab[i].dtdrt_msg : ""); + + drop.dtdda_kind = _dt_droptab[i].dtdrt_kind; + drop.dtdda_total = nval; + drop.dtdda_drops = nval - oval; + + if (dtp->dt_drophdlr == NULL) + return (dt_set_errno(dtp, EDT_DROPABORT)); + + if ((*dtp->dt_drophdlr)(&drop, + dtp->dt_droparg) == DTRACE_HANDLE_ABORT) + return (dt_set_errno(dtp, EDT_DROPABORT)); + } + + return (0); +} + +int +dt_handle_setopt(dtrace_hdl_t *dtp, dtrace_setoptdata_t *data) +{ + void *arg = dtp->dt_setoptarg; + + if (dtp->dt_setopthdlr == NULL) + return (0); + + if ((*dtp->dt_setopthdlr)(data, arg) == DTRACE_HANDLE_ABORT) + return (dt_set_errno(dtp, EDT_DIRABORT)); + + return (0); +} + +int +dt_handle(dtrace_hdl_t *dtp, dtrace_probedata_t *data) +{ + dtrace_eprobedesc_t *epd = data->dtpda_edesc; + int rval; + + switch (epd->dtepd_uarg) { + case DT_ECB_ERROR: + rval = dt_handle_err(dtp, data); + break; + + default: + return (DTRACE_CONSUME_THIS); + } + + if (rval == 0) + return (DTRACE_CONSUME_NEXT); + + return (DTRACE_CONSUME_ERROR); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_ident.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_ident.c new file mode 100644 index 0000000..5a2f0e4 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_ident.c @@ -0,0 +1,1052 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(sun) +#include <sys/sysmacros.h> +#endif +#include <strings.h> +#include <stdlib.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <assert.h> +#include <errno.h> +#include <ctype.h> +#if defined(sun) +#include <sys/procfs_isa.h> +#endif +#include <limits.h> + +#include <dt_ident.h> +#include <dt_parser.h> +#include <dt_provider.h> +#include <dt_strtab.h> +#include <dt_impl.h> + +/* + * Common code for cooking an identifier that uses a typed signature list (we + * use this for associative arrays and functions). If the argument list is + * of the same length and types, then return the return type. Otherwise + * print an appropriate compiler error message and abort the compile. + */ +static void +dt_idcook_sign(dt_node_t *dnp, dt_ident_t *idp, + int argc, dt_node_t *args, const char *prefix, const char *suffix) +{ + dt_idsig_t *isp = idp->di_data; + int i, compat, mismatch, arglimit, iskey; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + iskey = idp->di_kind == DT_IDENT_ARRAY || idp->di_kind == DT_IDENT_AGG; + + if (isp->dis_varargs >= 0) { + mismatch = argc < isp->dis_varargs; + arglimit = isp->dis_varargs; + } else if (isp->dis_optargs >= 0) { + mismatch = (argc < isp->dis_optargs || argc > isp->dis_argc); + arglimit = argc; + } else { + mismatch = argc != isp->dis_argc; + arglimit = isp->dis_argc; + } + + if (mismatch) { + xyerror(D_PROTO_LEN, "%s%s%s prototype mismatch: %d %s%s" + "passed, %s%d expected\n", prefix, idp->di_name, suffix, + argc, iskey ? "key" : "arg", argc == 1 ? " " : "s ", + isp->dis_optargs >= 0 ? "at least " : "", + isp->dis_optargs >= 0 ? isp->dis_optargs : arglimit); + } + + for (i = 0; i < arglimit; i++, args = args->dn_list) { + if (isp->dis_args[i].dn_ctfp != NULL) + compat = dt_node_is_argcompat(&isp->dis_args[i], args); + else + compat = 1; /* "@" matches any type */ + + if (!compat) { + xyerror(D_PROTO_ARG, + "%s%s%s %s #%d is incompatible with " + "prototype:\n\tprototype: %s\n\t%9s: %s\n", + prefix, idp->di_name, suffix, + iskey ? "key" : "argument", i + 1, + dt_node_type_name(&isp->dis_args[i], n1, + sizeof (n1)), + iskey ? "key" : "argument", + dt_node_type_name(args, n2, sizeof (n2))); + } + } + + dt_node_type_assign(dnp, idp->di_ctfp, idp->di_type, B_FALSE); +} + +/* + * Cook an associative array identifier. If this is the first time we are + * cooking this array, create its signature based on the argument list. + * Otherwise validate the argument list against the existing signature. + */ +static void +dt_idcook_assc(dt_node_t *dnp, dt_ident_t *idp, int argc, dt_node_t *args) +{ + if (idp->di_data == NULL) { + dt_idsig_t *isp = idp->di_data = malloc(sizeof (dt_idsig_t)); + char n[DT_TYPE_NAMELEN]; + int i; + + if (isp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + isp->dis_varargs = -1; + isp->dis_optargs = -1; + isp->dis_argc = argc; + isp->dis_args = NULL; + isp->dis_auxinfo = 0; + + if (argc != 0 && (isp->dis_args = calloc(argc, + sizeof (dt_node_t))) == NULL) { + idp->di_data = NULL; + free(isp); + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + } + + /* + * If this identifier has not been explicitly declared earlier, + * set the identifier's base type to be our special type <DYN>. + * If this ident is an aggregation, it will remain as is. If + * this ident is an associative array, it will be reassigned + * based on the result type of the first assignment statement. + */ + if (!(idp->di_flags & DT_IDFLG_DECL)) { + idp->di_ctfp = DT_DYN_CTFP(yypcb->pcb_hdl); + idp->di_type = DT_DYN_TYPE(yypcb->pcb_hdl); + } + + for (i = 0; i < argc; i++, args = args->dn_list) { + if (dt_node_is_dynamic(args) || dt_node_is_void(args)) { + xyerror(D_KEY_TYPE, "%s expression may not be " + "used as %s index: key #%d\n", + dt_node_type_name(args, n, sizeof (n)), + dt_idkind_name(idp->di_kind), i + 1); + } + + dt_node_type_propagate(args, &isp->dis_args[i]); + isp->dis_args[i].dn_list = &isp->dis_args[i + 1]; + } + + if (argc != 0) + isp->dis_args[argc - 1].dn_list = NULL; + + dt_node_type_assign(dnp, idp->di_ctfp, idp->di_type, B_FALSE); + + } else { + dt_idcook_sign(dnp, idp, argc, args, + idp->di_kind == DT_IDENT_AGG ? "@" : "", "[ ]"); + } +} + +/* + * Cook a function call. If this is the first time we are cooking this + * identifier, create its type signature based on predefined prototype stored + * in di_iarg. We then validate the argument list against this signature. + */ +static void +dt_idcook_func(dt_node_t *dnp, dt_ident_t *idp, int argc, dt_node_t *args) +{ + if (idp->di_data == NULL) { + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dtrace_typeinfo_t dtt; + dt_idsig_t *isp; + char *s, *p1, *p2; + int i = 0; + + assert(idp->di_iarg != NULL); + s = alloca(strlen(idp->di_iarg) + 1); + (void) strcpy(s, idp->di_iarg); + + if ((p2 = strrchr(s, ')')) != NULL) + *p2 = '\0'; /* mark end of parameter list string */ + + if ((p1 = strchr(s, '(')) != NULL) + *p1++ = '\0'; /* mark end of return type string */ + + if (p1 == NULL || p2 == NULL) { + xyerror(D_UNKNOWN, "internal error: malformed entry " + "for built-in function %s\n", idp->di_name); + } + + for (p2 = p1; *p2 != '\0'; p2++) { + if (!isspace(*p2)) { + i++; + break; + } + } + + for (p2 = strchr(p2, ','); p2++ != NULL; i++) + p2 = strchr(p2, ','); + + /* + * We first allocate a new ident signature structure with the + * appropriate number of argument entries, and then look up + * the return type and store its CTF data in di_ctfp/type. + */ + if ((isp = idp->di_data = malloc(sizeof (dt_idsig_t))) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + isp->dis_varargs = -1; + isp->dis_optargs = -1; + isp->dis_argc = i; + isp->dis_args = NULL; + isp->dis_auxinfo = 0; + + if (i != 0 && (isp->dis_args = calloc(i, + sizeof (dt_node_t))) == NULL) { + idp->di_data = NULL; + free(isp); + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + } + + if (dt_type_lookup(s, &dtt) == -1) { + xyerror(D_UNKNOWN, "failed to resolve type of %s (%s):" + " %s\n", idp->di_name, s, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + + if (idp->di_kind == DT_IDENT_AGGFUNC) { + idp->di_ctfp = DT_DYN_CTFP(dtp); + idp->di_type = DT_DYN_TYPE(dtp); + } else { + idp->di_ctfp = dtt.dtt_ctfp; + idp->di_type = dtt.dtt_type; + } + + /* + * For each comma-delimited parameter in the prototype string, + * we look up the corresponding type and store its CTF data in + * the corresponding location in dis_args[]. We also recognize + * the special type string "@" to indicate that the specified + * parameter may be a D expression of *any* type (represented + * as a dis_args[] element with ctfp = NULL, type == CTF_ERR). + * If a varargs "..." is present, we record the argument index + * in dis_varargs for the benefit of dt_idcook_sign(), above. + * If the type of an argument is enclosed in square brackets + * (e.g. "[int]"), the argument is considered optional: the + * argument may be absent, but if it is present, it must be of + * the specified type. Note that varargs may not optional, + * optional arguments may not follow varargs, and non-optional + * arguments may not follow optional arguments. + */ + for (i = 0; i < isp->dis_argc; i++, p1 = p2) { + while (isspace(*p1)) + p1++; /* skip leading whitespace */ + + if ((p2 = strchr(p1, ',')) == NULL) + p2 = p1 + strlen(p1); + else + *p2++ = '\0'; + + if (strcmp(p1, "@") == 0 || strcmp(p1, "...") == 0) { + isp->dis_args[i].dn_ctfp = NULL; + isp->dis_args[i].dn_type = CTF_ERR; + if (*p1 == '.') + isp->dis_varargs = i; + continue; + } + + if (*p1 == '[' && p1[strlen(p1) - 1] == ']') { + if (isp->dis_varargs != -1) { + xyerror(D_UNKNOWN, "optional arg#%d " + "may not follow variable arg#%d\n", + i + 1, isp->dis_varargs + 1); + } + + if (isp->dis_optargs == -1) + isp->dis_optargs = i; + + p1[strlen(p1) - 1] = '\0'; + p1++; + } else if (isp->dis_optargs != -1) { + xyerror(D_UNKNOWN, "required arg#%d may not " + "follow optional arg#%d\n", i + 1, + isp->dis_optargs + 1); + } + + if (dt_type_lookup(p1, &dtt) == -1) { + xyerror(D_UNKNOWN, "failed to resolve type of " + "%s arg#%d (%s): %s\n", idp->di_name, i + 1, + p1, dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + + dt_node_type_assign(&isp->dis_args[i], + dtt.dtt_ctfp, dtt.dtt_type, B_FALSE); + } + } + + dt_idcook_sign(dnp, idp, argc, args, "", "( )"); +} + +/* + * Cook a reference to the dynamically typed args[] array. We verify that the + * reference is using a single integer constant, and then construct a new ident + * representing the appropriate type or translation specifically for this node. + */ +static void +dt_idcook_args(dt_node_t *dnp, dt_ident_t *idp, int argc, dt_node_t *ap) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_probe_t *prp = yypcb->pcb_probe; + + dt_node_t tag, *nnp, *xnp; + dt_xlator_t *dxp; + dt_ident_t *xidp; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + if (argc != 1) { + xyerror(D_PROTO_LEN, "%s[ ] prototype mismatch: %d arg%s" + "passed, 1 expected\n", idp->di_name, argc, + argc == 1 ? " " : "s "); + } + + if (ap->dn_kind != DT_NODE_INT) { + xyerror(D_PROTO_ARG, "%s[ ] argument #1 is incompatible with " + "prototype:\n\tprototype: %s\n\t argument: %s\n", + idp->di_name, "integer constant", + dt_type_name(ap->dn_ctfp, ap->dn_type, n1, sizeof (n1))); + } + + if (yypcb->pcb_pdesc == NULL) { + xyerror(D_ARGS_NONE, "%s[ ] may not be referenced outside " + "of a probe clause\n", idp->di_name); + } + + if (prp == NULL) { + xyerror(D_ARGS_MULTI, + "%s[ ] may not be referenced because probe description %s " + "matches an unstable set of probes\n", idp->di_name, + dtrace_desc2str(yypcb->pcb_pdesc, n1, sizeof (n1))); + } + + if (ap->dn_value >= prp->pr_argc) { + xyerror(D_ARGS_IDX, "index %lld is out of range for %s %s[ ]\n", + (longlong_t)ap->dn_value, dtrace_desc2str(yypcb->pcb_pdesc, + n1, sizeof (n1)), idp->di_name); + } + + /* + * Look up the native and translated argument types for the probe. + * If no translation is needed, these will be the same underlying node. + * If translation is needed, look up the appropriate translator. Once + * we have the appropriate node, create a new dt_ident_t for this node, + * assign it the appropriate attributes, and set the type of 'dnp'. + */ + xnp = prp->pr_xargv[ap->dn_value]; + nnp = prp->pr_nargv[prp->pr_mapping[ap->dn_value]]; + + if (xnp->dn_type == CTF_ERR) { + xyerror(D_ARGS_TYPE, "failed to resolve translated type for " + "%s[%lld]\n", idp->di_name, (longlong_t)ap->dn_value); + } + + if (nnp->dn_type == CTF_ERR) { + xyerror(D_ARGS_TYPE, "failed to resolve native type for " + "%s[%lld]\n", idp->di_name, (longlong_t)ap->dn_value); + } + + if (dtp->dt_xlatemode == DT_XL_STATIC && ( + nnp == xnp || dt_node_is_argcompat(nnp, xnp))) { + dnp->dn_ident = dt_ident_create(idp->di_name, idp->di_kind, + idp->di_flags | DT_IDFLG_ORPHAN, idp->di_id, idp->di_attr, + idp->di_vers, idp->di_ops, idp->di_iarg, idp->di_gen); + + if (dnp->dn_ident == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dt_node_type_assign(dnp, + prp->pr_argv[ap->dn_value].dtt_ctfp, + prp->pr_argv[ap->dn_value].dtt_type, + prp->pr_argv[ap->dn_value].dtt_flags & DTT_FL_USER ? + B_TRUE : B_FALSE); + + } else if ((dxp = dt_xlator_lookup(dtp, + nnp, xnp, DT_XLATE_FUZZY)) != NULL || ( + dxp = dt_xlator_lookup(dtp, dt_probe_tag(prp, ap->dn_value, &tag), + xnp, DT_XLATE_EXACT | DT_XLATE_EXTERN)) != NULL) { + + xidp = dt_xlator_ident(dxp, xnp->dn_ctfp, xnp->dn_type); + + dnp->dn_ident = dt_ident_create(idp->di_name, xidp->di_kind, + xidp->di_flags | DT_IDFLG_ORPHAN, idp->di_id, idp->di_attr, + idp->di_vers, idp->di_ops, idp->di_iarg, idp->di_gen); + + if (dnp->dn_ident == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + if (dt_xlator_dynamic(dxp)) + dxp->dx_arg = (int)ap->dn_value; + + /* + * Propagate relevant members from the translator's internal + * dt_ident_t. This code must be kept in sync with the state + * that is initialized for idents in dt_xlator_create(). + */ + dnp->dn_ident->di_data = xidp->di_data; + dnp->dn_ident->di_ctfp = xidp->di_ctfp; + dnp->dn_ident->di_type = xidp->di_type; + + dt_node_type_assign(dnp, DT_DYN_CTFP(dtp), DT_DYN_TYPE(dtp), + B_FALSE); + + } else { + xyerror(D_ARGS_XLATOR, "translator for %s[%lld] from %s to %s " + "is not defined\n", idp->di_name, (longlong_t)ap->dn_value, + dt_node_type_name(nnp, n1, sizeof (n1)), + dt_node_type_name(xnp, n2, sizeof (n2))); + } + + assert(dnp->dn_ident->di_flags & DT_IDFLG_ORPHAN); + assert(dnp->dn_ident->di_id == idp->di_id); +} + +static void +dt_idcook_regs(dt_node_t *dnp, dt_ident_t *idp, int argc, dt_node_t *ap) +{ + dtrace_typeinfo_t dtt; + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + char n[DT_TYPE_NAMELEN]; + + if (argc != 1) { + xyerror(D_PROTO_LEN, "%s[ ] prototype mismatch: %d arg%s" + "passed, 1 expected\n", idp->di_name, + argc, argc == 1 ? " " : "s "); + } + + if (ap->dn_kind != DT_NODE_INT) { + xyerror(D_PROTO_ARG, "%s[ ] argument #1 is incompatible with " + "prototype:\n\tprototype: %s\n\t argument: %s\n", + idp->di_name, "integer constant", + dt_type_name(ap->dn_ctfp, ap->dn_type, n, sizeof (n))); + } + + if ((ap->dn_flags & DT_NF_SIGNED) && (int64_t)ap->dn_value < 0) { + xyerror(D_REGS_IDX, "index %lld is out of range for array %s\n", + (longlong_t)ap->dn_value, idp->di_name); + } + + if (dt_type_lookup("uint64_t", &dtt) == -1) { + xyerror(D_UNKNOWN, "failed to resolve type of %s: %s\n", + idp->di_name, dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + + idp->di_ctfp = dtt.dtt_ctfp; + idp->di_type = dtt.dtt_type; + + dt_node_type_assign(dnp, idp->di_ctfp, idp->di_type, B_FALSE); +} + +/*ARGSUSED*/ +static void +dt_idcook_type(dt_node_t *dnp, dt_ident_t *idp, int argc, dt_node_t *args) +{ + if (idp->di_type == CTF_ERR) { + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dtrace_typeinfo_t dtt; + + if (dt_type_lookup(idp->di_iarg, &dtt) == -1) { + xyerror(D_UNKNOWN, + "failed to resolve type %s for identifier %s: %s\n", + (const char *)idp->di_iarg, idp->di_name, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + + idp->di_ctfp = dtt.dtt_ctfp; + idp->di_type = dtt.dtt_type; + } + + dt_node_type_assign(dnp, idp->di_ctfp, idp->di_type, B_FALSE); +} + +/*ARGSUSED*/ +static void +dt_idcook_thaw(dt_node_t *dnp, dt_ident_t *idp, int argc, dt_node_t *args) +{ + if (idp->di_ctfp != NULL && idp->di_type != CTF_ERR) + dt_node_type_assign(dnp, idp->di_ctfp, idp->di_type, B_FALSE); +} + +static void +dt_idcook_inline(dt_node_t *dnp, dt_ident_t *idp, int argc, dt_node_t *args) +{ + if (idp->di_kind == DT_IDENT_ARRAY) + dt_idcook_assc(dnp, idp, argc, args); + else + dt_idcook_thaw(dnp, idp, argc, args); +} + +static void +dt_iddtor_sign(dt_ident_t *idp) +{ + if (idp->di_data != NULL) + free(((dt_idsig_t *)idp->di_data)->dis_args); + free(idp->di_data); +} + +static void +dt_iddtor_free(dt_ident_t *idp) +{ + free(idp->di_data); +} + +static void +dt_iddtor_inline(dt_ident_t *idp) +{ + dt_idnode_t *inp = idp->di_iarg; + + if (inp != NULL) { + dt_node_link_free(&inp->din_list); + + if (inp->din_hash != NULL) + dt_idhash_destroy(inp->din_hash); + + free(inp->din_argv); + free(inp); + } + + if (idp->di_kind == DT_IDENT_ARRAY) + dt_iddtor_sign(idp); + else + dt_iddtor_free(idp); +} + +/*ARGSUSED*/ +static void +dt_iddtor_none(dt_ident_t *idp) +{ + /* do nothing */ +} + +static void +dt_iddtor_probe(dt_ident_t *idp) +{ + if (idp->di_data != NULL) + dt_probe_destroy(idp->di_data); +} + +static size_t +dt_idsize_type(dt_ident_t *idp) +{ + return (ctf_type_size(idp->di_ctfp, idp->di_type)); +} + +/*ARGSUSED*/ +static size_t +dt_idsize_none(dt_ident_t *idp) +{ + return (0); +} + +const dt_idops_t dt_idops_assc = { + dt_idcook_assc, + dt_iddtor_sign, + dt_idsize_none, +}; + +const dt_idops_t dt_idops_func = { + dt_idcook_func, + dt_iddtor_sign, + dt_idsize_none, +}; + +const dt_idops_t dt_idops_args = { + dt_idcook_args, + dt_iddtor_none, + dt_idsize_none, +}; + +const dt_idops_t dt_idops_regs = { + dt_idcook_regs, + dt_iddtor_free, + dt_idsize_none, +}; + +const dt_idops_t dt_idops_type = { + dt_idcook_type, + dt_iddtor_free, + dt_idsize_type, +}; + +const dt_idops_t dt_idops_thaw = { + dt_idcook_thaw, + dt_iddtor_free, + dt_idsize_type, +}; + +const dt_idops_t dt_idops_inline = { + dt_idcook_inline, + dt_iddtor_inline, + dt_idsize_type, +}; + +const dt_idops_t dt_idops_probe = { + dt_idcook_thaw, + dt_iddtor_probe, + dt_idsize_none, +}; + +static void +dt_idhash_populate(dt_idhash_t *dhp) +{ + const dt_ident_t *idp = dhp->dh_tmpl; + + dhp->dh_tmpl = NULL; /* clear dh_tmpl first to avoid recursion */ + dt_dprintf("populating %s idhash from %p\n", dhp->dh_name, (void *)idp); + + for (; idp->di_name != NULL; idp++) { + if (dt_idhash_insert(dhp, idp->di_name, + idp->di_kind, idp->di_flags, idp->di_id, idp->di_attr, + idp->di_vers, idp->di_ops ? idp->di_ops : &dt_idops_thaw, + idp->di_iarg, 0) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + } +} + +dt_idhash_t * +dt_idhash_create(const char *name, const dt_ident_t *tmpl, + uint_t min, uint_t max) +{ + dt_idhash_t *dhp; + size_t size; + + assert(min <= max); + + size = sizeof (dt_idhash_t) + + sizeof (dt_ident_t *) * (_dtrace_strbuckets - 1); + + if ((dhp = malloc(size)) == NULL) + return (NULL); + + bzero(dhp, size); + dhp->dh_name = name; + dhp->dh_tmpl = tmpl; + dhp->dh_nextid = min; + dhp->dh_minid = min; + dhp->dh_maxid = max; + dhp->dh_hashsz = _dtrace_strbuckets; + + return (dhp); +} + +/* + * Destroy an entire identifier hash. This must be done using two passes with + * an inlined version of dt_ident_destroy() to avoid referencing freed memory. + * In the first pass di_dtor() is called for all identifiers; then the second + * pass frees the actual dt_ident_t's. These must be done separately because + * a di_dtor() may operate on data structures which contain references to other + * identifiers inside of this hash itself (e.g. a global inline definition + * which contains a parse tree that refers to another global variable). + */ +void +dt_idhash_destroy(dt_idhash_t *dhp) +{ + dt_ident_t *idp, *next; + ulong_t i; + + for (i = 0; i < dhp->dh_hashsz; i++) { + for (idp = dhp->dh_hash[i]; idp != NULL; idp = next) { + next = idp->di_next; + idp->di_ops->di_dtor(idp); + } + } + + for (i = 0; i < dhp->dh_hashsz; i++) { + for (idp = dhp->dh_hash[i]; idp != NULL; idp = next) { + next = idp->di_next; + free(idp->di_name); + free(idp); + } + } + + free(dhp); +} + +void +dt_idhash_update(dt_idhash_t *dhp) +{ + uint_t nextid = dhp->dh_minid; + dt_ident_t *idp; + ulong_t i; + + for (i = 0; i < dhp->dh_hashsz; i++) { + for (idp = dhp->dh_hash[i]; idp != NULL; idp = idp->di_next) { + /* + * Right now we're hard coding which types need to be + * reset, but ideally this would be done dynamically. + */ + if (idp->di_kind == DT_IDENT_ARRAY || + idp->di_kind == DT_IDENT_SCALAR || + idp->di_kind == DT_IDENT_AGG) + nextid = MAX(nextid, idp->di_id + 1); + } + } + + dhp->dh_nextid = nextid; +} + +dt_ident_t * +dt_idhash_lookup(dt_idhash_t *dhp, const char *name) +{ + size_t len; + ulong_t h = dt_strtab_hash(name, &len) % dhp->dh_hashsz; + dt_ident_t *idp; + + if (dhp->dh_tmpl != NULL) + dt_idhash_populate(dhp); /* fill hash w/ initial population */ + + for (idp = dhp->dh_hash[h]; idp != NULL; idp = idp->di_next) { + if (strcmp(idp->di_name, name) == 0) + return (idp); + } + + return (NULL); +} + +int +dt_idhash_nextid(dt_idhash_t *dhp, uint_t *p) +{ + if (dhp->dh_nextid >= dhp->dh_maxid) + return (-1); /* no more id's are free to allocate */ + + *p = dhp->dh_nextid++; + return (0); +} + +ulong_t +dt_idhash_size(const dt_idhash_t *dhp) +{ + return (dhp->dh_nelems); +} + +const char * +dt_idhash_name(const dt_idhash_t *dhp) +{ + return (dhp->dh_name); +} + +dt_ident_t * +dt_idhash_insert(dt_idhash_t *dhp, const char *name, ushort_t kind, + ushort_t flags, uint_t id, dtrace_attribute_t attr, uint_t vers, + const dt_idops_t *ops, void *iarg, ulong_t gen) +{ + dt_ident_t *idp; + ulong_t h; + + if (dhp->dh_tmpl != NULL) + dt_idhash_populate(dhp); /* fill hash w/ initial population */ + + idp = dt_ident_create(name, kind, flags, id, + attr, vers, ops, iarg, gen); + + if (idp == NULL) + return (NULL); + + h = dt_strtab_hash(name, NULL) % dhp->dh_hashsz; + idp->di_next = dhp->dh_hash[h]; + + dhp->dh_hash[h] = idp; + dhp->dh_nelems++; + + if (dhp->dh_defer != NULL) + dhp->dh_defer(dhp, idp); + + return (idp); +} + +void +dt_idhash_xinsert(dt_idhash_t *dhp, dt_ident_t *idp) +{ + ulong_t h; + + if (dhp->dh_tmpl != NULL) + dt_idhash_populate(dhp); /* fill hash w/ initial population */ + + h = dt_strtab_hash(idp->di_name, NULL) % dhp->dh_hashsz; + idp->di_next = dhp->dh_hash[h]; + idp->di_flags &= ~DT_IDFLG_ORPHAN; + + dhp->dh_hash[h] = idp; + dhp->dh_nelems++; + + if (dhp->dh_defer != NULL) + dhp->dh_defer(dhp, idp); +} + +void +dt_idhash_delete(dt_idhash_t *dhp, dt_ident_t *key) +{ + size_t len; + ulong_t h = dt_strtab_hash(key->di_name, &len) % dhp->dh_hashsz; + dt_ident_t **pp = &dhp->dh_hash[h]; + dt_ident_t *idp; + + for (idp = dhp->dh_hash[h]; idp != NULL; idp = idp->di_next) { + if (idp == key) + break; + else + pp = &idp->di_next; + } + + assert(idp == key); + *pp = idp->di_next; + + assert(dhp->dh_nelems != 0); + dhp->dh_nelems--; + + if (!(idp->di_flags & DT_IDFLG_ORPHAN)) + dt_ident_destroy(idp); +} + +static int +dt_idhash_comp(const void *lp, const void *rp) +{ + const dt_ident_t *lhs = *((const dt_ident_t **)lp); + const dt_ident_t *rhs = *((const dt_ident_t **)rp); + + if (lhs->di_id != rhs->di_id) + return ((int)(lhs->di_id - rhs->di_id)); + else + return (strcmp(lhs->di_name, rhs->di_name)); +} + +int +dt_idhash_iter(dt_idhash_t *dhp, dt_idhash_f *func, void *data) +{ + dt_ident_t **ids; + dt_ident_t *idp; + ulong_t i, j, n; + int rv; + + if (dhp->dh_tmpl != NULL) + dt_idhash_populate(dhp); /* fill hash w/ initial population */ + + n = dhp->dh_nelems; + ids = alloca(sizeof (dt_ident_t *) * n); + + for (i = 0, j = 0; i < dhp->dh_hashsz; i++) { + for (idp = dhp->dh_hash[i]; idp != NULL; idp = idp->di_next) + ids[j++] = idp; + } + + qsort(ids, dhp->dh_nelems, sizeof (dt_ident_t *), dt_idhash_comp); + + for (i = 0; i < n; i++) { + if ((rv = func(dhp, ids[i], data)) != 0) + return (rv); + } + + return (0); +} + +dt_ident_t * +dt_idstack_lookup(dt_idstack_t *sp, const char *name) +{ + dt_idhash_t *dhp; + dt_ident_t *idp; + + for (dhp = dt_list_prev(&sp->dids_list); + dhp != NULL; dhp = dt_list_prev(dhp)) { + if ((idp = dt_idhash_lookup(dhp, name)) != NULL) + return (idp); + } + + return (NULL); +} + +void +dt_idstack_push(dt_idstack_t *sp, dt_idhash_t *dhp) +{ + dt_list_append(&sp->dids_list, dhp); +} + +void +dt_idstack_pop(dt_idstack_t *sp, dt_idhash_t *dhp) +{ + assert(dt_list_prev(&sp->dids_list) == dhp); + dt_list_delete(&sp->dids_list, dhp); +} + +dt_ident_t * +dt_ident_create(const char *name, ushort_t kind, ushort_t flags, uint_t id, + dtrace_attribute_t attr, uint_t vers, + const dt_idops_t *ops, void *iarg, ulong_t gen) +{ + dt_ident_t *idp; + char *s = NULL; + + if ((name != NULL && (s = strdup(name)) == NULL) || + (idp = malloc(sizeof (dt_ident_t))) == NULL) { + free(s); + return (NULL); + } + + idp->di_name = s; + idp->di_kind = kind; + idp->di_flags = flags; + idp->di_id = id; + idp->di_attr = attr; + idp->di_vers = vers; + idp->di_ops = ops; + idp->di_iarg = iarg; + idp->di_data = NULL; + idp->di_ctfp = NULL; + idp->di_type = CTF_ERR; + idp->di_next = NULL; + idp->di_gen = gen; + idp->di_lineno = yylineno; + + return (idp); +} + +/* + * Destroy an individual identifier. This code must be kept in sync with the + * dt_idhash_destroy() function below, which separates out the call to di_dtor. + */ +void +dt_ident_destroy(dt_ident_t *idp) +{ + idp->di_ops->di_dtor(idp); + free(idp->di_name); + free(idp); +} + +void +dt_ident_morph(dt_ident_t *idp, ushort_t kind, + const dt_idops_t *ops, void *iarg) +{ + idp->di_ops->di_dtor(idp); + idp->di_kind = kind; + idp->di_ops = ops; + idp->di_iarg = iarg; + idp->di_data = NULL; +} + +dtrace_attribute_t +dt_ident_cook(dt_node_t *dnp, dt_ident_t *idp, dt_node_t **pargp) +{ + dtrace_attribute_t attr; + dt_node_t *args, *argp; + int argc = 0; + + attr = dt_node_list_cook(pargp, DT_IDFLG_REF); + args = pargp ? *pargp : NULL; + + for (argp = args; argp != NULL; argp = argp->dn_list) + argc++; + + idp->di_ops->di_cook(dnp, idp, argc, args); + + if (idp->di_flags & DT_IDFLG_USER) + dnp->dn_flags |= DT_NF_USERLAND; + + return (dt_attr_min(attr, idp->di_attr)); +} + +void +dt_ident_type_assign(dt_ident_t *idp, ctf_file_t *fp, ctf_id_t type) +{ + idp->di_ctfp = fp; + idp->di_type = type; +} + +dt_ident_t * +dt_ident_resolve(dt_ident_t *idp) +{ + while (idp->di_flags & DT_IDFLG_INLINE) { + const dt_node_t *dnp = ((dt_idnode_t *)idp->di_iarg)->din_root; + + if (dnp == NULL) + break; /* can't resolve any further yet */ + + switch (dnp->dn_kind) { + case DT_NODE_VAR: + case DT_NODE_SYM: + case DT_NODE_FUNC: + case DT_NODE_AGG: + case DT_NODE_INLINE: + case DT_NODE_PROBE: + idp = dnp->dn_ident; + continue; + } + + if (dt_node_is_dynamic(dnp)) + idp = dnp->dn_ident; + else + break; + } + + return (idp); +} + +size_t +dt_ident_size(dt_ident_t *idp) +{ + idp = dt_ident_resolve(idp); + return (idp->di_ops->di_size(idp)); +} + +int +dt_ident_unref(const dt_ident_t *idp) +{ + return (idp->di_gen == yypcb->pcb_hdl->dt_gen && + (idp->di_flags & (DT_IDFLG_REF|DT_IDFLG_MOD|DT_IDFLG_DECL)) == 0); +} + +const char * +dt_idkind_name(uint_t kind) +{ + switch (kind) { + case DT_IDENT_ARRAY: return ("associative array"); + case DT_IDENT_SCALAR: return ("scalar"); + case DT_IDENT_PTR: return ("pointer"); + case DT_IDENT_FUNC: return ("function"); + case DT_IDENT_AGG: return ("aggregation"); + case DT_IDENT_AGGFUNC: return ("aggregating function"); + case DT_IDENT_ACTFUNC: return ("tracing function"); + case DT_IDENT_XLSOU: return ("translated data"); + case DT_IDENT_XLPTR: return ("pointer to translated data"); + case DT_IDENT_SYMBOL: return ("external symbol reference"); + case DT_IDENT_ENUM: return ("enumerator"); + case DT_IDENT_PRAGAT: return ("#pragma attributes"); + case DT_IDENT_PRAGBN: return ("#pragma binding"); + case DT_IDENT_PROBE: return ("probe definition"); + default: return ("<?>"); + } +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_ident.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_ident.h new file mode 100644 index 0000000..cc80d6e --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_ident.h @@ -0,0 +1,183 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_IDENT_H +#define _DT_IDENT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libctf.h> +#include <dtrace.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#include <dt_list.h> + +struct dt_node; +struct dt_ident; +struct dt_idhash; +struct dt_irlist; +struct dt_regset; + +typedef struct dt_idsig { + int dis_varargs; /* argument index of start of varargs (or -1) */ + int dis_optargs; /* argument index of start of optargs (or -1) */ + int dis_argc; /* number of types in this signature */ + struct dt_node *dis_args; /* array of nodes representing formal types */ + uint64_t dis_auxinfo; /* auxiliary signature information, if any */ +} dt_idsig_t; + +typedef struct dt_idnode { + struct dt_node *din_list; /* allocation list for parse tree nodes */ + struct dt_node *din_root; /* root of this identifier's parse tree */ + struct dt_idhash *din_hash; /* identifiers private to this subtree */ + struct dt_ident **din_argv; /* identifiers in din_hash for arguments */ + int din_argc; /* length of din_argv[] array */ +} dt_idnode_t; + +typedef struct dt_idops { + void (*di_cook)(struct dt_node *, struct dt_ident *, + int, struct dt_node *); + void (*di_dtor)(struct dt_ident *); + size_t (*di_size)(struct dt_ident *); +} dt_idops_t; + +typedef struct dt_ident { + char *di_name; /* identifier name */ + ushort_t di_kind; /* identifier kind (see below) */ + ushort_t di_flags; /* identifier flags (see below) */ + uint_t di_id; /* variable or subr id (see <sys/dtrace.h>) */ + dtrace_attribute_t di_attr; /* identifier stability attributes */ + uint_t di_vers; /* identifier version number (dt_version_t) */ + const dt_idops_t *di_ops; /* identifier's class-specific ops vector */ + void *di_iarg; /* initial argument pointer for ops vector */ + void *di_data; /* private data pointer for ops vector */ + ctf_file_t *di_ctfp; /* CTF container for the variable data type */ + ctf_id_t di_type; /* CTF identifier for the variable data type */ + struct dt_ident *di_next; /* pointer to next ident in hash chain */ + ulong_t di_gen; /* generation number (pass that created me) */ + int di_lineno; /* line number that defined this identifier */ +} dt_ident_t; + +#define DT_IDENT_ARRAY 0 /* identifier is an array variable */ +#define DT_IDENT_SCALAR 1 /* identifier is a scalar variable */ +#define DT_IDENT_PTR 2 /* identifier is a magic pointer */ +#define DT_IDENT_FUNC 3 /* identifier is a built-in function */ +#define DT_IDENT_AGG 4 /* identifier is an aggregation */ +#define DT_IDENT_AGGFUNC 5 /* identifier is an aggregating function */ +#define DT_IDENT_ACTFUNC 6 /* identifier is an action function */ +#define DT_IDENT_XLSOU 7 /* identifier is a translated struct or union */ +#define DT_IDENT_XLPTR 8 /* identifier is a translated pointer */ +#define DT_IDENT_SYMBOL 9 /* identifier is an external symbol */ +#define DT_IDENT_ENUM 10 /* identifier is an enumerator */ +#define DT_IDENT_PRAGAT 11 /* identifier is #pragma attributes */ +#define DT_IDENT_PRAGBN 12 /* identifier is #pragma binding */ +#define DT_IDENT_PROBE 13 /* identifier is a probe definition */ + +#define DT_IDFLG_TLS 0x0001 /* variable is thread-local storage */ +#define DT_IDFLG_LOCAL 0x0002 /* variable is local storage */ +#define DT_IDFLG_WRITE 0x0004 /* variable is writable (can be modified) */ +#define DT_IDFLG_INLINE 0x0008 /* variable is an inline definition */ +#define DT_IDFLG_REF 0x0010 /* variable is referenced by this program */ +#define DT_IDFLG_MOD 0x0020 /* variable is modified by this program */ +#define DT_IDFLG_DIFR 0x0040 /* variable is referenced by current DIFO */ +#define DT_IDFLG_DIFW 0x0080 /* variable is modified by current DIFO */ +#define DT_IDFLG_CGREG 0x0100 /* variable is inlined by code generator */ +#define DT_IDFLG_USER 0x0200 /* variable is associated with userland */ +#define DT_IDFLG_PRIM 0x0400 /* variable is associated with primary object */ +#define DT_IDFLG_DECL 0x0800 /* variable is associated with explicit decl */ +#define DT_IDFLG_ORPHAN 0x1000 /* variable is in a dt_node and not dt_idhash */ + +typedef struct dt_idhash { + dt_list_t dh_list; /* list prev/next pointers for dt_idstack */ + const char *dh_name; /* name of this hash table */ + void (*dh_defer)(struct dt_idhash *, dt_ident_t *); /* defer callback */ + const dt_ident_t *dh_tmpl; /* template for initial ident population */ + uint_t dh_nextid; /* next id to be returned by idhash_nextid() */ + uint_t dh_minid; /* min id to be returned by idhash_nextid() */ + uint_t dh_maxid; /* max id to be returned by idhash_nextid() */ + ulong_t dh_nelems; /* number of identifiers in hash table */ + ulong_t dh_hashsz; /* number of entries in dh_buckets array */ + dt_ident_t *dh_hash[1]; /* array of hash table bucket pointers */ +} dt_idhash_t; + +typedef struct dt_idstack { + dt_list_t dids_list; /* list meta-data for dt_idhash_t stack */ +} dt_idstack_t; + +extern const dt_idops_t dt_idops_assc; /* associative array or aggregation */ +extern const dt_idops_t dt_idops_func; /* function call built-in */ +extern const dt_idops_t dt_idops_args; /* args[] built-in */ +extern const dt_idops_t dt_idops_regs; /* regs[]/uregs[] built-in */ +extern const dt_idops_t dt_idops_type; /* predefined type name string */ +extern const dt_idops_t dt_idops_thaw; /* prefrozen type identifier */ +extern const dt_idops_t dt_idops_inline; /* inline variable */ +extern const dt_idops_t dt_idops_probe; /* probe definition */ + +extern dt_idhash_t *dt_idhash_create(const char *, const dt_ident_t *, + uint_t, uint_t); +extern void dt_idhash_destroy(dt_idhash_t *); +extern void dt_idhash_update(dt_idhash_t *); +extern dt_ident_t *dt_idhash_lookup(dt_idhash_t *, const char *); +extern int dt_idhash_nextid(dt_idhash_t *, uint_t *); +extern ulong_t dt_idhash_size(const dt_idhash_t *); +extern const char *dt_idhash_name(const dt_idhash_t *); + +extern dt_ident_t *dt_idhash_insert(dt_idhash_t *, const char *, ushort_t, + ushort_t, uint_t, dtrace_attribute_t, uint_t, + const dt_idops_t *, void *, ulong_t); + +extern void dt_idhash_xinsert(dt_idhash_t *, dt_ident_t *); +extern void dt_idhash_delete(dt_idhash_t *, dt_ident_t *); + +typedef int dt_idhash_f(dt_idhash_t *, dt_ident_t *, void *); +extern int dt_idhash_iter(dt_idhash_t *, dt_idhash_f *, void *); + +extern dt_ident_t *dt_idstack_lookup(dt_idstack_t *, const char *); +extern void dt_idstack_push(dt_idstack_t *, dt_idhash_t *); +extern void dt_idstack_pop(dt_idstack_t *, dt_idhash_t *); + +extern dt_ident_t *dt_ident_create(const char *, ushort_t, ushort_t, uint_t, + dtrace_attribute_t, uint_t, const dt_idops_t *, void *, ulong_t); +extern void dt_ident_destroy(dt_ident_t *); +extern void dt_ident_morph(dt_ident_t *, ushort_t, const dt_idops_t *, void *); +extern dtrace_attribute_t dt_ident_cook(struct dt_node *, + dt_ident_t *, struct dt_node **); + +extern void dt_ident_type_assign(dt_ident_t *, ctf_file_t *, ctf_id_t); +extern dt_ident_t *dt_ident_resolve(dt_ident_t *); +extern size_t dt_ident_size(dt_ident_t *); +extern int dt_ident_unref(const dt_ident_t *); + +extern const char *dt_idkind_name(uint_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_IDENT_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_impl.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_impl.h new file mode 100644 index 0000000..8cf5fe2 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_impl.h @@ -0,0 +1,735 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#ifndef _DT_IMPL_H +#define _DT_IMPL_H + +#include <sys/param.h> +#include <sys/objfs.h> +#if !defined(sun) +#include <sys/bitmap.h> +#include <sys/utsname.h> +#include <sys/ioccom.h> +#include <sys/time.h> +#include <string.h> +#endif +#include <setjmp.h> +#include <libctf.h> +#include <dtrace.h> +#include <gelf.h> +#if defined(sun) +#include <synch.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include <dt_parser.h> +#include <dt_regset.h> +#include <dt_inttab.h> +#include <dt_strtab.h> +#include <dt_ident.h> +#include <dt_list.h> +#include <dt_decl.h> +#include <dt_as.h> +#include <dt_proc.h> +#include <dt_dof.h> +#include <dt_pcb.h> +#include <dt_pq.h> + +struct dt_module; /* see below */ +struct dt_pfdict; /* see <dt_printf.h> */ +struct dt_arg; /* see below */ +struct dt_provider; /* see <dt_provider.h> */ +struct dt_xlator; /* see <dt_xlator.h> */ + +typedef struct dt_intrinsic { + const char *din_name; /* string name of the intrinsic type */ + ctf_encoding_t din_data; /* integer or floating-point CTF encoding */ + uint_t din_kind; /* CTF type kind to instantiate */ +} dt_intrinsic_t; + +typedef struct dt_typedef { + const char *dty_src; /* string name of typedef source type */ + const char *dty_dst; /* string name of typedef destination type */ +} dt_typedef_t; + +typedef struct dt_intdesc { + const char *did_name; /* string name of the integer type */ + ctf_file_t *did_ctfp; /* CTF container for this type reference */ + ctf_id_t did_type; /* CTF type reference for this type */ + uintmax_t did_limit; /* maximum positive value held by type */ +} dt_intdesc_t; + +typedef struct dt_modops { + uint_t (*do_syminit)(struct dt_module *); + void (*do_symsort)(struct dt_module *); + GElf_Sym *(*do_symname)(struct dt_module *, + const char *, GElf_Sym *, uint_t *); + GElf_Sym *(*do_symaddr)(struct dt_module *, + GElf_Addr, GElf_Sym *, uint_t *); +} dt_modops_t; + +typedef struct dt_arg { + int da_ndx; /* index of this argument */ + int da_mapping; /* mapping of argument indices to arguments */ + ctf_id_t da_type; /* type of argument */ + ctf_file_t *da_ctfp; /* CTF container for type */ + dt_ident_t *da_xlator; /* translator, if any */ + struct dt_arg *da_next; /* next argument */ +} dt_arg_t; + +typedef struct dt_sym { + uint_t ds_symid; /* id of corresponding symbol */ + uint_t ds_next; /* index of next element in hash chain */ +} dt_sym_t; + +typedef struct dt_module { + dt_list_t dm_list; /* list forward/back pointers */ + char dm_name[DTRACE_MODNAMELEN]; /* string name of module */ + char dm_file[MAXPATHLEN]; /* file path of module (if any) */ + struct dt_module *dm_next; /* pointer to next module in hash chain */ + const dt_modops_t *dm_ops; /* pointer to data model's ops vector */ + Elf *dm_elf; /* libelf handle for module object */ + objfs_info_t dm_info; /* object filesystem private info */ + ctf_sect_t dm_symtab; /* symbol table for module */ + ctf_sect_t dm_strtab; /* string table for module */ + ctf_sect_t dm_ctdata; /* CTF data for module */ + ctf_file_t *dm_ctfp; /* CTF container handle */ + uint_t *dm_symbuckets; /* symbol table hash buckets (chain indices) */ + dt_sym_t *dm_symchains; /* symbol table hash chains buffer */ + void *dm_asmap; /* symbol pointers sorted by value */ + uint_t dm_symfree; /* index of next free hash element */ + uint_t dm_nsymbuckets; /* number of elements in bucket array */ + uint_t dm_nsymelems; /* number of elements in hash table */ + uint_t dm_asrsv; /* actual reserved size of dm_asmap */ + uint_t dm_aslen; /* number of entries in dm_asmap */ + uint_t dm_flags; /* module flags (see below) */ + int dm_modid; /* modinfo(1M) module identifier */ + GElf_Addr dm_text_va; /* virtual address of text section */ + GElf_Xword dm_text_size; /* size in bytes of text section */ + GElf_Addr dm_data_va; /* virtual address of data section */ + GElf_Xword dm_data_size; /* size in bytes of data section */ + GElf_Addr dm_bss_va; /* virtual address of BSS */ + GElf_Xword dm_bss_size; /* size in bytes of BSS */ + dt_idhash_t *dm_extern; /* external symbol definitions */ +#if !defined(sun) + caddr_t dm_reloc_offset; /* Symbol relocation offset. */ + uintptr_t *dm_sec_offsets; +#endif + pid_t dm_pid; /* pid for this module */ + uint_t dm_nctflibs; /* number of ctf children libraries */ + ctf_file_t **dm_libctfp; /* process library ctf pointers */ + char **dm_libctfn; /* names of process ctf containers */ +} dt_module_t; + +#define DT_DM_LOADED 0x1 /* module symbol and type data is loaded */ +#define DT_DM_KERNEL 0x2 /* module is associated with a kernel object */ +#define DT_DM_PRIMARY 0x4 /* module is a krtld primary kernel object */ + +typedef struct dt_provmod { + char *dp_name; /* name of provider module */ + struct dt_provmod *dp_next; /* next module */ +} dt_provmod_t; + +typedef struct dt_ahashent { + struct dt_ahashent *dtahe_prev; /* prev on hash chain */ + struct dt_ahashent *dtahe_next; /* next on hash chain */ + struct dt_ahashent *dtahe_prevall; /* prev on list of all */ + struct dt_ahashent *dtahe_nextall; /* next on list of all */ + uint64_t dtahe_hashval; /* hash value */ + size_t dtahe_size; /* size of data */ + dtrace_aggdata_t dtahe_data; /* data */ + void (*dtahe_aggregate)(int64_t *, int64_t *, size_t); /* function */ +} dt_ahashent_t; + +typedef struct dt_ahash { + dt_ahashent_t **dtah_hash; /* hash table */ + dt_ahashent_t *dtah_all; /* list of all elements */ + size_t dtah_size; /* size of hash table */ +} dt_ahash_t; + +typedef struct dt_aggregate { + dtrace_bufdesc_t dtat_buf; /* buf aggregation snapshot */ + int dtat_flags; /* aggregate flags */ + processorid_t dtat_ncpus; /* number of CPUs in aggregate */ + processorid_t *dtat_cpus; /* CPUs in aggregate */ + processorid_t dtat_ncpu; /* size of dtat_cpus array */ + processorid_t dtat_maxcpu; /* maximum number of CPUs */ + dt_ahash_t dtat_hash; /* aggregate hash table */ +} dt_aggregate_t; + +typedef struct dt_print_aggdata { + dtrace_hdl_t *dtpa_dtp; /* pointer to libdtrace handle */ + dtrace_aggvarid_t dtpa_id; /* aggregation variable of interest */ + FILE *dtpa_fp; /* file pointer */ + int dtpa_allunprint; /* print only unprinted aggregations */ + int dtpa_agghist; /* print aggregation as histogram */ + int dtpa_agghisthdr; /* aggregation histogram hdr printed */ + int dtpa_aggpack; /* pack quantized aggregations */ +} dt_print_aggdata_t; + +typedef struct dt_dirpath { + dt_list_t dir_list; /* linked-list forward/back pointers */ + char *dir_path; /* directory pathname */ +} dt_dirpath_t; + +typedef struct dt_lib_depend { + dt_list_t dtld_deplist; /* linked-list forward/back pointers */ + char *dtld_library; /* library name */ + char *dtld_libpath; /* library pathname */ + uint_t dtld_finish; /* completion time in tsort for lib */ + uint_t dtld_start; /* starting time in tsort for lib */ + uint_t dtld_loaded; /* boolean: is this library loaded */ + dt_list_t dtld_dependencies; /* linked-list of lib dependencies */ + dt_list_t dtld_dependents; /* linked-list of lib dependents */ +} dt_lib_depend_t; + +typedef uint32_t dt_version_t; /* encoded version (see below) */ + +struct dtrace_hdl { + const dtrace_vector_t *dt_vector; /* library vector, if vectored open */ + void *dt_varg; /* vector argument, if vectored open */ + dtrace_conf_t dt_conf; /* DTrace driver configuration profile */ + char dt_errmsg[BUFSIZ]; /* buffer for formatted syntax error msgs */ + const char *dt_errtag; /* tag used with last call to dt_set_errmsg() */ + dt_pcb_t *dt_pcb; /* pointer to current parsing control block */ + ulong_t dt_gen; /* compiler generation number */ + dt_list_t dt_programs; /* linked list of dtrace_prog_t's */ + dt_list_t dt_xlators; /* linked list of dt_xlator_t's */ + struct dt_xlator **dt_xlatormap; /* dt_xlator_t's indexed by dx_id */ + id_t dt_xlatorid; /* next dt_xlator_t id to assign */ + dt_ident_t *dt_externs; /* linked list of external symbol identifiers */ + dt_idhash_t *dt_macros; /* hash table of macro variable identifiers */ + dt_idhash_t *dt_aggs; /* hash table of aggregation identifiers */ + dt_idhash_t *dt_globals; /* hash table of global identifiers */ + dt_idhash_t *dt_tls; /* hash table of thread-local identifiers */ + dt_list_t dt_modlist; /* linked list of dt_module_t's */ + dt_module_t **dt_mods; /* hash table of dt_module_t's */ + uint_t dt_modbuckets; /* number of module hash buckets */ + uint_t dt_nmods; /* number of modules in hash and list */ + dt_provmod_t *dt_provmod; /* linked list of provider modules */ + dt_module_t *dt_exec; /* pointer to executable module */ + dt_module_t *dt_rtld; /* pointer to run-time linker module */ + dt_module_t *dt_cdefs; /* pointer to C dynamic type module */ + dt_module_t *dt_ddefs; /* pointer to D dynamic type module */ + dt_list_t dt_provlist; /* linked list of dt_provider_t's */ + struct dt_provider **dt_provs; /* hash table of dt_provider_t's */ + uint_t dt_provbuckets; /* number of provider hash buckets */ + uint_t dt_nprovs; /* number of providers in hash and list */ + dt_proc_hash_t *dt_procs; /* hash table of grabbed process handles */ + char **dt_proc_env; /* additional environment variables */ + dt_intdesc_t dt_ints[6]; /* cached integer type descriptions */ + ctf_id_t dt_type_func; /* cached CTF identifier for function type */ + ctf_id_t dt_type_fptr; /* cached CTF identifier for function pointer */ + ctf_id_t dt_type_str; /* cached CTF identifier for string type */ + ctf_id_t dt_type_dyn; /* cached CTF identifier for <DYN> type */ + ctf_id_t dt_type_stack; /* cached CTF identifier for stack type */ + ctf_id_t dt_type_symaddr; /* cached CTF identifier for _symaddr type */ + ctf_id_t dt_type_usymaddr; /* cached CTF ident. for _usymaddr type */ + size_t dt_maxprobe; /* max enabled probe ID */ + dtrace_eprobedesc_t **dt_edesc; /* enabled probe descriptions */ + dtrace_probedesc_t **dt_pdesc; /* probe descriptions for enabled prbs */ + size_t dt_maxagg; /* max aggregation ID */ + dtrace_aggdesc_t **dt_aggdesc; /* aggregation descriptions */ + int dt_maxformat; /* max format ID */ + void **dt_formats; /* pointer to format array */ + int dt_maxstrdata; /* max strdata ID */ + char **dt_strdata; /* pointer to strdata array */ + dt_aggregate_t dt_aggregate; /* aggregate */ + dt_pq_t *dt_bufq; /* CPU-specific data queue */ + struct dt_pfdict *dt_pfdict; /* dictionary of printf conversions */ + dt_version_t dt_vmax; /* optional ceiling on program API binding */ + dtrace_attribute_t dt_amin; /* optional floor on program attributes */ + char *dt_cpp_path; /* pathname of cpp(1) to invoke if needed */ + char **dt_cpp_argv; /* argument vector for exec'ing cpp(1) */ + int dt_cpp_argc; /* count of initialized cpp(1) arguments */ + int dt_cpp_args; /* size of dt_cpp_argv[] array */ + char *dt_ld_path; /* pathname of ld(1) to invoke if needed */ + dt_list_t dt_lib_path; /* linked-list forming library search path */ + uint_t dt_lazyload; /* boolean: set via -xlazyload */ + uint_t dt_droptags; /* boolean: set via -xdroptags */ + uint_t dt_active; /* boolean: set once tracing is active */ + uint_t dt_stopped; /* boolean: set once tracing is stopped */ + processorid_t dt_beganon; /* CPU that executed BEGIN probe (if any) */ + processorid_t dt_endedon; /* CPU that executed END probe (if any) */ + uint_t dt_oflags; /* dtrace open-time options (see dtrace.h) */ + uint_t dt_cflags; /* dtrace compile-time options (see dtrace.h) */ + uint_t dt_dflags; /* dtrace link-time options (see dtrace.h) */ + uint_t dt_prcmode; /* dtrace process create mode (see dt_proc.h) */ + uint_t dt_linkmode; /* dtrace symbol linking mode (see below) */ + uint_t dt_linktype; /* dtrace link output file type (see below) */ + uint_t dt_xlatemode; /* dtrace translator linking mode (see below) */ + uint_t dt_stdcmode; /* dtrace stdc compatibility mode (see below) */ + uint_t dt_encoding; /* dtrace output encoding (see below) */ + uint_t dt_treedump; /* dtrace tree debug bitmap (see below) */ + uint64_t dt_options[DTRACEOPT_MAX]; /* dtrace run-time options */ + int dt_version; /* library version requested by client */ + int dt_ctferr; /* error resulting from last CTF failure */ + int dt_errno; /* error resulting from last failed operation */ +#if !defined(sun) + const char *dt_errfile; + int dt_errline; +#endif + int dt_fd; /* file descriptor for dtrace pseudo-device */ + int dt_ftfd; /* file descriptor for fasttrap pseudo-device */ + int dt_fterr; /* saved errno from failed open of dt_ftfd */ + int dt_cdefs_fd; /* file descriptor for C CTF debugging cache */ + int dt_ddefs_fd; /* file descriptor for D CTF debugging cache */ +#if defined(sun) + int dt_stdout_fd; /* file descriptor for saved stdout */ +#else + FILE *dt_freopen_fp; /* file pointer for freopened stdout */ +#endif + dtrace_handle_err_f *dt_errhdlr; /* error handler, if any */ + void *dt_errarg; /* error handler argument */ + dtrace_prog_t *dt_errprog; /* error handler program, if any */ + dtrace_handle_drop_f *dt_drophdlr; /* drop handler, if any */ + void *dt_droparg; /* drop handler argument */ + dtrace_handle_proc_f *dt_prochdlr; /* proc handler, if any */ + void *dt_procarg; /* proc handler argument */ + dtrace_handle_setopt_f *dt_setopthdlr; /* setopt handler, if any */ + void *dt_setoptarg; /* setopt handler argument */ + dtrace_status_t dt_status[2]; /* status cache */ + int dt_statusgen; /* current status generation */ + hrtime_t dt_laststatus; /* last status */ + hrtime_t dt_lastswitch; /* last switch of buffer data */ + hrtime_t dt_lastagg; /* last snapshot of aggregation data */ + char *dt_sprintf_buf; /* buffer for dtrace_sprintf() */ + int dt_sprintf_buflen; /* length of dtrace_sprintf() buffer */ + const char *dt_filetag; /* default filetag for dt_set_errmsg() */ + char *dt_buffered_buf; /* buffer for buffered output */ + size_t dt_buffered_offs; /* current offset into buffered buffer */ + size_t dt_buffered_size; /* size of buffered buffer */ + dtrace_handle_buffered_f *dt_bufhdlr; /* buffered handler, if any */ + void *dt_bufarg; /* buffered handler argument */ + dt_dof_t dt_dof; /* DOF generation buffers (see dt_dof.c) */ + struct utsname dt_uts; /* uname(2) information for system */ + dt_list_t dt_lib_dep; /* scratch linked-list of lib dependencies */ + dt_list_t dt_lib_dep_sorted; /* dependency sorted library list */ + dtrace_flowkind_t dt_flow; /* flow kind */ + const char *dt_prefix; /* recommended flow prefix */ + int dt_indent; /* recommended flow indent */ + dtrace_epid_t dt_last_epid; /* most recently consumed EPID */ + uint64_t dt_last_timestamp; /* most recently consumed timestamp */ +}; + +/* + * Values for the user arg of the ECB. + */ +#define DT_ECB_DEFAULT 0 +#define DT_ECB_ERROR 1 + +/* + * Values for the dt_linkmode property, which is used by the assembler when + * processing external symbol references. User can set using -xlink=<mode>. + */ +#define DT_LINK_KERNEL 0 /* kernel syms static, user syms dynamic */ +#define DT_LINK_PRIMARY 1 /* primary kernel syms static, others dynamic */ +#define DT_LINK_DYNAMIC 2 /* all symbols dynamic */ +#define DT_LINK_STATIC 3 /* all symbols static */ + +/* + * Values for the dt_linktype property, which is used by dtrace_program_link() + * to determine the type of output file that is desired by the client. + */ +#define DT_LTYP_ELF 0 /* produce ELF containing DOF */ +#define DT_LTYP_DOF 1 /* produce stand-alone DOF */ + +/* + * Values for the dt_xlatemode property, which is used to determine whether + * references to dynamic translators are permitted. Set using -xlate=<mode>. + */ +#define DT_XL_STATIC 0 /* require xlators to be statically defined */ +#define DT_XL_DYNAMIC 1 /* produce references to dynamic translators */ + +/* + * Values for the dt_stdcmode property, which is used by the compiler when + * running cpp to determine the presence and setting of the __STDC__ macro. + */ +#define DT_STDC_XA 0 /* ISO C + K&R C compat w/o ISO: __STDC__=0 */ +#define DT_STDC_XC 1 /* Strict ISO C: __STDC__=1 */ +#define DT_STDC_XS 2 /* K&R C: __STDC__ not defined */ +#define DT_STDC_XT 3 /* ISO C + K&R C compat with ISO: __STDC__=0 */ + +/* + * Values for the dt_encoding property, which is used to force a particular + * character encoding (overriding default behavior and/or automatic detection). + */ +#define DT_ENCODING_UNSET 0 +#define DT_ENCODING_ASCII 1 +#define DT_ENCODING_UTF8 2 + +/* + * Macro to test whether a given pass bit is set in the dt_treedump bit-vector. + * If the bit for pass 'p' is set, the D compiler displays the parse tree for + * the program by printing it to stderr at the end of compiler pass 'p'. + */ +#define DT_TREEDUMP_PASS(dtp, p) ((dtp)->dt_treedump & (1 << ((p) - 1))) + +/* + * Macros for accessing the cached CTF container and type ID for the common + * types "int", "string", and <DYN>, which we need to use frequently in the D + * compiler. The DT_INT_* macro relies upon "int" being at index 0 in the + * _dtrace_ints_* tables in dt_open.c; the others are also set up there. + */ +#define DT_INT_CTFP(dtp) ((dtp)->dt_ints[0].did_ctfp) +#define DT_INT_TYPE(dtp) ((dtp)->dt_ints[0].did_type) + +#define DT_FUNC_CTFP(dtp) ((dtp)->dt_ddefs->dm_ctfp) +#define DT_FUNC_TYPE(dtp) ((dtp)->dt_type_func) + +#define DT_FPTR_CTFP(dtp) ((dtp)->dt_ddefs->dm_ctfp) +#define DT_FPTR_TYPE(dtp) ((dtp)->dt_type_fptr) + +#define DT_STR_CTFP(dtp) ((dtp)->dt_ddefs->dm_ctfp) +#define DT_STR_TYPE(dtp) ((dtp)->dt_type_str) + +#define DT_DYN_CTFP(dtp) ((dtp)->dt_ddefs->dm_ctfp) +#define DT_DYN_TYPE(dtp) ((dtp)->dt_type_dyn) + +#define DT_STACK_CTFP(dtp) ((dtp)->dt_ddefs->dm_ctfp) +#define DT_STACK_TYPE(dtp) ((dtp)->dt_type_stack) + +#define DT_SYMADDR_CTFP(dtp) ((dtp)->dt_ddefs->dm_ctfp) +#define DT_SYMADDR_TYPE(dtp) ((dtp)->dt_type_symaddr) + +#define DT_USYMADDR_CTFP(dtp) ((dtp)->dt_ddefs->dm_ctfp) +#define DT_USYMADDR_TYPE(dtp) ((dtp)->dt_type_usymaddr) + +/* + * Actions and subroutines are both DT_NODE_FUNC nodes; to avoid confusing + * an action for a subroutine (or vice versa), we assure that the DT_ACT_* + * constants and the DIF_SUBR_* constants occupy non-overlapping ranges by + * starting the DT_ACT_* constants at DIF_SUBR_MAX + 1. + */ +#define DT_ACT_BASE DIF_SUBR_MAX + 1 +#define DT_ACT(n) (DT_ACT_BASE + (n)) + +#define DT_ACT_PRINTF DT_ACT(0) /* printf() action */ +#define DT_ACT_TRACE DT_ACT(1) /* trace() action */ +#define DT_ACT_TRACEMEM DT_ACT(2) /* tracemem() action */ +#define DT_ACT_STACK DT_ACT(3) /* stack() action */ +#define DT_ACT_STOP DT_ACT(4) /* stop() action */ +#define DT_ACT_BREAKPOINT DT_ACT(5) /* breakpoint() action */ +#define DT_ACT_PANIC DT_ACT(6) /* panic() action */ +#define DT_ACT_SPECULATE DT_ACT(7) /* speculate() action */ +#define DT_ACT_COMMIT DT_ACT(8) /* commit() action */ +#define DT_ACT_DISCARD DT_ACT(9) /* discard() action */ +#define DT_ACT_CHILL DT_ACT(10) /* chill() action */ +#define DT_ACT_EXIT DT_ACT(11) /* exit() action */ +#define DT_ACT_USTACK DT_ACT(12) /* ustack() action */ +#define DT_ACT_PRINTA DT_ACT(13) /* printa() action */ +#define DT_ACT_RAISE DT_ACT(14) /* raise() action */ +#define DT_ACT_CLEAR DT_ACT(15) /* clear() action */ +#define DT_ACT_NORMALIZE DT_ACT(16) /* normalize() action */ +#define DT_ACT_DENORMALIZE DT_ACT(17) /* denormalize() action */ +#define DT_ACT_TRUNC DT_ACT(18) /* trunc() action */ +#define DT_ACT_SYSTEM DT_ACT(19) /* system() action */ +#define DT_ACT_JSTACK DT_ACT(20) /* jstack() action */ +#define DT_ACT_FTRUNCATE DT_ACT(21) /* ftruncate() action */ +#define DT_ACT_FREOPEN DT_ACT(22) /* freopen() action */ +#define DT_ACT_SYM DT_ACT(23) /* sym()/func() actions */ +#define DT_ACT_MOD DT_ACT(24) /* mod() action */ +#define DT_ACT_USYM DT_ACT(25) /* usym()/ufunc() actions */ +#define DT_ACT_UMOD DT_ACT(26) /* umod() action */ +#define DT_ACT_UADDR DT_ACT(27) /* uaddr() action */ +#define DT_ACT_SETOPT DT_ACT(28) /* setopt() action */ +#define DT_ACT_PRINT DT_ACT(29) /* print() action */ +#define DT_ACT_PRINTM DT_ACT(30) /* printm() action */ +#define DT_ACT_PRINTT DT_ACT(31) /* printt() action */ + +/* + * Sentinel to tell freopen() to restore the saved stdout. This must not + * be ever valid for opening for write access via freopen(3C), which of + * course, "." never is. + */ +#define DT_FREOPEN_RESTORE "." + +#define EDT_BASE 1000 /* base value for libdtrace errnos */ + +enum { + EDT_VERSION = EDT_BASE, /* client is requesting unsupported version */ + EDT_VERSINVAL, /* version string is invalid or overflows */ + EDT_VERSUNDEF, /* requested API version is not defined */ + EDT_VERSREDUCED, /* requested API version has been reduced */ + EDT_CTF, /* libctf called failed (dt_ctferr has more) */ + EDT_COMPILER, /* error in D program compilation */ + EDT_NOTUPREG, /* tuple register allocation failure */ + EDT_NOMEM, /* memory allocation failure */ + EDT_INT2BIG, /* integer limit exceeded */ + EDT_STR2BIG, /* string limit exceeded */ + EDT_NOMOD, /* unknown module name */ + EDT_NOPROV, /* unknown provider name */ + EDT_NOPROBE, /* unknown probe name */ + EDT_NOSYM, /* unknown symbol name */ + EDT_NOSYMADDR, /* no symbol corresponds to address */ + EDT_NOTYPE, /* unknown type name */ + EDT_NOVAR, /* unknown variable name */ + EDT_NOAGG, /* unknown aggregation name */ + EDT_BADSCOPE, /* improper use of type name scoping operator */ + EDT_BADSPEC, /* overspecified probe description */ + EDT_BADSPCV, /* bad macro variable in probe description */ + EDT_BADID, /* invalid probe identifier */ + EDT_NOTLOADED, /* module is not currently loaded */ + EDT_NOCTF, /* module does not contain any CTF data */ + EDT_DATAMODEL, /* module and program data models don't match */ + EDT_DIFVERS, /* library has newer DIF version than driver */ + EDT_BADAGG, /* unrecognized aggregating action */ + EDT_FIO, /* file i/o error */ + EDT_DIFINVAL, /* invalid DIF program */ + EDT_DIFSIZE, /* invalid DIF size */ + EDT_DIFFAULT, /* failed to copyin DIF program */ + EDT_BADPROBE, /* bad probe description */ + EDT_BADPGLOB, /* bad probe description globbing pattern */ + EDT_NOSCOPE, /* declaration scope stack underflow */ + EDT_NODECL, /* declaration stack underflow */ + EDT_DMISMATCH, /* record list does not match statement */ + EDT_DOFFSET, /* record data offset error */ + EDT_DALIGN, /* record data alignment error */ + EDT_BADOPTNAME, /* invalid dtrace_setopt option name */ + EDT_BADOPTVAL, /* invalid dtrace_setopt option value */ + EDT_BADOPTCTX, /* invalid dtrace_setopt option context */ + EDT_CPPFORK, /* failed to fork preprocessor */ + EDT_CPPEXEC, /* failed to exec preprocessor */ + EDT_CPPENT, /* preprocessor not found */ + EDT_CPPERR, /* unknown preprocessor error */ + EDT_SYMOFLOW, /* external symbol table overflow */ + EDT_ACTIVE, /* operation illegal when tracing is active */ + EDT_DESTRUCTIVE, /* destructive actions not allowed */ + EDT_NOANON, /* no anonymous tracing state */ + EDT_ISANON, /* can't claim anon state and enable probes */ + EDT_ENDTOOBIG, /* END enablings exceed size of prncpl buffer */ + EDT_NOCONV, /* failed to load type for printf conversion */ + EDT_BADCONV, /* incomplete printf conversion */ + EDT_BADERROR, /* invalid library ERROR action */ + EDT_ERRABORT, /* abort due to error */ + EDT_DROPABORT, /* abort due to drop */ + EDT_DIRABORT, /* abort explicitly directed */ + EDT_BADRVAL, /* invalid return value from callback */ + EDT_BADNORMAL, /* invalid normalization */ + EDT_BUFTOOSMALL, /* enabling exceeds size of buffer */ + EDT_BADTRUNC, /* invalid truncation */ + EDT_BUSY, /* device busy (active kernel debugger) */ + EDT_ACCESS, /* insufficient privileges to use DTrace */ + EDT_NOENT, /* dtrace device not available */ + EDT_BRICKED, /* abort due to systemic unresponsiveness */ + EDT_HARDWIRE, /* failed to load hard-wired definitions */ + EDT_ELFVERSION, /* libelf is out-of-date w.r.t libdtrace */ + EDT_NOBUFFERED, /* attempt to buffer output without handler */ + EDT_UNSTABLE, /* description matched unstable set of probes */ + EDT_BADSETOPT, /* invalid setopt library action */ + EDT_BADSTACKPC, /* invalid stack program counter size */ + EDT_BADAGGVAR, /* invalid aggregation variable identifier */ + EDT_OVERSION, /* client is requesting deprecated version */ + EDT_ENABLING_ERR, /* failed to enable probe */ + EDT_NOPROBES, /* no probes sites for declared provider */ + EDT_CANTLOAD /* failed to load a module */ +}; + +/* + * Interfaces for parsing and comparing DTrace attribute tuples, which describe + * stability and architectural binding information. The dtrace_attribute_t + * structure and associated constant definitions are found in <sys/dtrace.h>. + */ +extern dtrace_attribute_t dt_attr_min(dtrace_attribute_t, dtrace_attribute_t); +extern dtrace_attribute_t dt_attr_max(dtrace_attribute_t, dtrace_attribute_t); +extern char *dt_attr_str(dtrace_attribute_t, char *, size_t); +extern int dt_attr_cmp(dtrace_attribute_t, dtrace_attribute_t); + +/* + * Interfaces for parsing and handling DTrace version strings. Version binding + * is a feature of the D compiler that is handled completely independently of + * the DTrace kernel infrastructure, so the definitions are here in libdtrace. + * Version strings are compiled into an encoded uint32_t which can be compared + * using C comparison operators. Version definitions are found in dt_open.c. + */ +#define DT_VERSION_STRMAX 16 /* enough for "255.4095.4095\0" */ +#define DT_VERSION_MAJMAX 0xFF /* maximum major version number */ +#define DT_VERSION_MINMAX 0xFFF /* maximum minor version number */ +#define DT_VERSION_MICMAX 0xFFF /* maximum micro version number */ + +#define DT_VERSION_NUMBER(M, m, u) \ + ((((M) & 0xFF) << 24) | (((m) & 0xFFF) << 12) | ((u) & 0xFFF)) + +#define DT_VERSION_MAJOR(v) (((v) & 0xFF000000) >> 24) +#define DT_VERSION_MINOR(v) (((v) & 0x00FFF000) >> 12) +#define DT_VERSION_MICRO(v) ((v) & 0x00000FFF) + +extern char *dt_version_num2str(dt_version_t, char *, size_t); +extern int dt_version_str2num(const char *, dt_version_t *); +extern int dt_version_defined(dt_version_t); + +/* + * Miscellaneous internal libdtrace interfaces. The definitions below are for + * libdtrace routines that do not yet merit their own separate header file. + */ +extern char *dt_cpp_add_arg(dtrace_hdl_t *, const char *); +extern char *dt_cpp_pop_arg(dtrace_hdl_t *); + +#if defined(sun) +extern int dt_set_errno(dtrace_hdl_t *, int); +#else +int _dt_set_errno(dtrace_hdl_t *, int, const char *, int); +void dt_get_errloc(dtrace_hdl_t *, const char **, int *); +#define dt_set_errno(_a,_b) _dt_set_errno(_a,_b,__FILE__,__LINE__) +#endif +extern void dt_set_errmsg(dtrace_hdl_t *, const char *, const char *, + const char *, int, const char *, va_list); + +#if defined(sun) +extern int dt_ioctl(dtrace_hdl_t *, int, void *); +#else +extern int dt_ioctl(dtrace_hdl_t *, u_long, void *); +#endif +extern int dt_status(dtrace_hdl_t *, processorid_t); +extern long dt_sysconf(dtrace_hdl_t *, int); +extern ssize_t dt_write(dtrace_hdl_t *, int, const void *, size_t); +extern int dt_printf(dtrace_hdl_t *, FILE *, const char *, ...); + +extern void *dt_zalloc(dtrace_hdl_t *, size_t); +extern void *dt_alloc(dtrace_hdl_t *, size_t); +extern void dt_free(dtrace_hdl_t *, void *); +extern void dt_difo_free(dtrace_hdl_t *, dtrace_difo_t *); + +extern int dt_gmatch(const char *, const char *); +extern char *dt_basename(char *); + +extern ulong_t dt_popc(ulong_t); +extern ulong_t dt_popcb(const ulong_t *, ulong_t); + +extern int dt_buffered_enable(dtrace_hdl_t *); +extern int dt_buffered_flush(dtrace_hdl_t *, dtrace_probedata_t *, + const dtrace_recdesc_t *, const dtrace_aggdata_t *, uint32_t flags); +extern void dt_buffered_disable(dtrace_hdl_t *); +extern void dt_buffered_destroy(dtrace_hdl_t *); + +extern uint64_t dt_stddev(uint64_t *, uint64_t); + +extern int dt_rw_read_held(pthread_rwlock_t *); +extern int dt_rw_write_held(pthread_rwlock_t *); +extern int dt_mutex_held(pthread_mutex_t *); +extern int dt_options_load(dtrace_hdl_t *); + +#define DT_RW_READ_HELD(x) dt_rw_read_held(x) +#define DT_RW_WRITE_HELD(x) dt_rw_write_held(x) +#define DT_RW_LOCK_HELD(x) (DT_RW_READ_HELD(x) || DT_RW_WRITE_HELD(x)) +#define DT_MUTEX_HELD(x) dt_mutex_held(x) + +extern void dt_dprintf(const char *, ...); + +extern void dt_setcontext(dtrace_hdl_t *, dtrace_probedesc_t *); +extern void dt_endcontext(dtrace_hdl_t *); + +extern void dt_pragma(dt_node_t *); +extern int dt_reduce(dtrace_hdl_t *, dt_version_t); +extern void dt_cg(dt_pcb_t *, dt_node_t *); +extern dtrace_difo_t *dt_as(dt_pcb_t *); +extern void dt_dis(const dtrace_difo_t *, FILE *); + +extern int dt_aggregate_go(dtrace_hdl_t *); +extern int dt_aggregate_init(dtrace_hdl_t *); +extern void dt_aggregate_destroy(dtrace_hdl_t *); + +extern int dt_epid_lookup(dtrace_hdl_t *, dtrace_epid_t, + dtrace_eprobedesc_t **, dtrace_probedesc_t **); +extern void dt_epid_destroy(dtrace_hdl_t *); +extern int dt_aggid_lookup(dtrace_hdl_t *, dtrace_aggid_t, dtrace_aggdesc_t **); +extern void dt_aggid_destroy(dtrace_hdl_t *); + +extern void *dt_format_lookup(dtrace_hdl_t *, int); +extern void dt_format_destroy(dtrace_hdl_t *); + +extern const char *dt_strdata_lookup(dtrace_hdl_t *, int); +extern void dt_strdata_destroy(dtrace_hdl_t *); + +extern int dt_print_quantize(dtrace_hdl_t *, FILE *, + const void *, size_t, uint64_t); +extern int dt_print_lquantize(dtrace_hdl_t *, FILE *, + const void *, size_t, uint64_t); +extern int dt_print_llquantize(dtrace_hdl_t *, FILE *, + const void *, size_t, uint64_t); +extern int dt_print_agg(const dtrace_aggdata_t *, void *); + +extern int dt_handle(dtrace_hdl_t *, dtrace_probedata_t *); +extern int dt_handle_liberr(dtrace_hdl_t *, + const dtrace_probedata_t *, const char *); +extern int dt_handle_cpudrop(dtrace_hdl_t *, processorid_t, + dtrace_dropkind_t, uint64_t); +extern int dt_handle_status(dtrace_hdl_t *, + dtrace_status_t *, dtrace_status_t *); +extern int dt_handle_setopt(dtrace_hdl_t *, dtrace_setoptdata_t *); + +extern int dt_lib_depend_add(dtrace_hdl_t *, dt_list_t *, const char *); +extern dt_lib_depend_t *dt_lib_depend_lookup(dt_list_t *, const char *); + +extern dt_pcb_t *yypcb; /* pointer to current parser control block */ +extern char yyintprefix; /* int token prefix for macros (+/-) */ +extern char yyintsuffix[4]; /* int token suffix ([uUlL]*) */ +extern int yyintdecimal; /* int token is decimal (1) or octal/hex (0) */ +extern char yytext[]; /* lex input buffer */ +extern int yylineno; /* lex line number */ +extern int yydebug; /* lex debugging */ +extern dt_node_t *yypragma; /* lex token list for control lines */ + +extern const dtrace_attribute_t _dtrace_maxattr; /* maximum attributes */ +extern const dtrace_attribute_t _dtrace_defattr; /* default attributes */ +extern const dtrace_attribute_t _dtrace_symattr; /* symbol ref attributes */ +extern const dtrace_attribute_t _dtrace_typattr; /* type ref attributes */ +extern const dtrace_attribute_t _dtrace_prvattr; /* provider attributes */ +extern const dtrace_pattr_t _dtrace_prvdesc; /* provider attribute bundle */ + +extern const dt_version_t _dtrace_versions[]; /* array of valid versions */ +extern const char *const _dtrace_version; /* current version string */ + +extern int _dtrace_strbuckets; /* number of hash buckets for strings */ +extern int _dtrace_intbuckets; /* number of hash buckets for ints */ +extern uint_t _dtrace_stkindent; /* default indent for stack/ustack */ +extern uint_t _dtrace_pidbuckets; /* number of hash buckets for pids */ +extern uint_t _dtrace_pidlrulim; /* number of proc handles to cache */ +extern int _dtrace_debug; /* debugging messages enabled */ +extern size_t _dtrace_bufsize; /* default dt_buf_create() size */ +extern int _dtrace_argmax; /* default maximum probe arguments */ + +extern const char *_dtrace_libdir; /* default library directory */ +extern const char *_dtrace_moddir; /* default kernel module directory */ + +#ifdef __FreeBSD__ +extern int gmatch(const char *, const char *); +extern int yylex(void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_IMPL_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_inttab.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_inttab.c new file mode 100644 index 0000000..a6ac589 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_inttab.c @@ -0,0 +1,115 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> + +#include <dt_inttab.h> +#include <dt_impl.h> + +dt_inttab_t * +dt_inttab_create(dtrace_hdl_t *dtp) +{ + uint_t len = _dtrace_intbuckets; + dt_inttab_t *ip; + + assert((len & (len - 1)) == 0); + + if ((ip = dt_zalloc(dtp, sizeof (dt_inttab_t))) == NULL || + (ip->int_hash = dt_zalloc(dtp, sizeof (void *) * len)) == NULL) { + dt_free(dtp, ip); + return (NULL); + } + + ip->int_hdl = dtp; + ip->int_hashlen = len; + + return (ip); +} + +void +dt_inttab_destroy(dt_inttab_t *ip) +{ + dt_inthash_t *hp, *np; + + for (hp = ip->int_head; hp != NULL; hp = np) { + np = hp->inh_next; + dt_free(ip->int_hdl, hp); + } + + dt_free(ip->int_hdl, ip->int_hash); + dt_free(ip->int_hdl, ip); +} + +int +dt_inttab_insert(dt_inttab_t *ip, uint64_t value, uint_t flags) +{ + uint_t h = value & (ip->int_hashlen - 1); + dt_inthash_t *hp; + + if (flags & DT_INT_SHARED) { + for (hp = ip->int_hash[h]; hp != NULL; hp = hp->inh_hash) { + if (hp->inh_value == value && hp->inh_flags == flags) + return (hp->inh_index); + } + } + + if ((hp = dt_alloc(ip->int_hdl, sizeof (dt_inthash_t))) == NULL) + return (-1); + + hp->inh_hash = ip->int_hash[h]; + hp->inh_next = NULL; + hp->inh_value = value; + hp->inh_index = ip->int_index++; + hp->inh_flags = flags; + + ip->int_hash[h] = hp; + ip->int_nelems++; + + if (ip->int_head == NULL) + ip->int_head = hp; + else + ip->int_tail->inh_next = hp; + + ip->int_tail = hp; + return (hp->inh_index); +} + +uint_t +dt_inttab_size(const dt_inttab_t *ip) +{ + return (ip->int_nelems); +} + +void +dt_inttab_write(const dt_inttab_t *ip, uint64_t *dst) +{ + const dt_inthash_t *hp; + + for (hp = ip->int_head; hp != NULL; hp = hp->inh_next) + *dst++ = hp->inh_value; +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_inttab.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_inttab.h new file mode 100644 index 0000000..c1e86e3 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_inttab.h @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_INTTAB_H +#define _DT_INTTAB_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <dtrace.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dt_inthash { + struct dt_inthash *inh_hash; /* next dt_inthash in hash chain */ + struct dt_inthash *inh_next; /* next dt_inthash in output table */ + uint64_t inh_value; /* value associated with this element */ + uint_t inh_index; /* index associated with this element */ + uint_t inh_flags; /* flags (see below) */ +} dt_inthash_t; + +typedef struct dt_inttab { + dtrace_hdl_t *int_hdl; /* pointer back to library handle */ + dt_inthash_t **int_hash; /* array of hash buckets */ + uint_t int_hashlen; /* size of hash bucket array */ + uint_t int_nelems; /* number of elements hashed */ + dt_inthash_t *int_head; /* head of table in index order */ + dt_inthash_t *int_tail; /* tail of table in index order */ + uint_t int_index; /* next index to hand out */ +} dt_inttab_t; + +#define DT_INT_PRIVATE 0 /* only a single ref for this entry */ +#define DT_INT_SHARED 1 /* multiple refs can share entry */ + +extern dt_inttab_t *dt_inttab_create(dtrace_hdl_t *); +extern void dt_inttab_destroy(dt_inttab_t *); +extern int dt_inttab_insert(dt_inttab_t *, uint64_t, uint_t); +extern uint_t dt_inttab_size(const dt_inttab_t *); +extern void dt_inttab_write(const dt_inttab_t *, uint64_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_INTTAB_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_lex.l b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_lex.l new file mode 100644 index 0000000..032d303 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_lex.l @@ -0,0 +1,883 @@ +%{ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <ctype.h> +#include <errno.h> + +#include <dt_impl.h> +#include <dt_grammar.h> +#include <dt_parser.h> +#include <dt_string.h> + +/* + * We need to undefine lex's input and unput macros so that references to these + * call the functions provided at the end of this source file. + */ +#if defined(sun) +#undef input +#undef unput +#else +/* + * Define YY_INPUT for flex since input() can't be re-defined. + */ +#define YY_INPUT(buf,result,max_size) \ + if (yypcb->pcb_fileptr != NULL) { \ + if (((result = fread(buf, 1, max_size, yypcb->pcb_fileptr)) == 0) \ + && ferror(yypcb->pcb_fileptr)) \ + longjmp(yypcb->pcb_jmpbuf, EDT_FIO); \ + } else { \ + int n; \ + for (n = 0; n < max_size && \ + yypcb->pcb_strptr < yypcb->pcb_string + yypcb->pcb_strlen; n++) \ + buf[n] = *yypcb->pcb_strptr++; \ + result = n; \ + } +/* + * Do not EOF let tokens to be put back. This does not work with flex. + * On the other hand, leaving current buffer in same state it was when + * last EOF was received guarantees that input() will keep returning EOF + * for all subsequent invocations, which is the effect desired. + */ +#undef unput +#define unput(c) \ + do { \ + int _c = c; \ + if (_c != EOF) \ + yyunput(_c, yytext_ptr); \ + } while(0) +#endif + +static int id_or_type(const char *); +#if defined(sun) +static int input(void); +static void unput(int); +#endif + +/* + * We first define a set of labeled states for use in the D lexer and then a + * set of regular expressions to simplify things below. The lexer states are: + * + * S0 - D program clause and expression lexing + * S1 - D comments (i.e. skip everything until end of comment) + * S2 - D program outer scope (probe specifiers and declarations) + * S3 - D control line parsing (i.e. after ^# is seen but before \n) + * S4 - D control line scan (locate control directives only and invoke S3) + */ +%} + +%e 1500 /* maximum nodes */ +%p 4900 /* maximum positions */ +%n 600 /* maximum states */ +%a 3000 /* maximum transitions */ + +%s S0 S1 S2 S3 S4 + +RGX_AGG "@"[a-zA-Z_][0-9a-zA-Z_]* +RGX_PSPEC [-$:a-zA-Z_.?*\\\[\]!][-$:0-9a-zA-Z_.`?*\\\[\]!]* +RGX_ALTIDENT [a-zA-Z_][0-9a-zA-Z_]* +RGX_LMID LM[0-9a-fA-F]+` +RGX_MOD_IDENT [a-zA-Z_`][0-9a-z.A-Z_`]*` +RGX_IDENT [a-zA-Z_`][0-9a-zA-Z_`]* +RGX_INT ([0-9]+|0[xX][0-9A-Fa-f]+)[uU]?[lL]?[lL]? +RGX_FP ([0-9]+("."?)[0-9]*|"."[0-9]+)((e|E)("+"|-)?[0-9]+)?[fFlL]? +RGX_WS [\f\n\r\t\v ] +RGX_STR ([^"\\\n]|\\[^"\n]|\\\")* +RGX_CHR ([^'\\\n]|\\[^'\n]|\\')* +RGX_INTERP ^[\f\t\v ]*#!.* +RGX_CTL ^[\f\t\v ]*# + +%% + +%{ + +/* + * We insert a special prologue into yylex() itself: if the pcb contains a + * context token, we return that prior to running the normal lexer. This + * allows libdtrace to force yacc into one of our three parsing contexts: D + * expression (DT_CTX_DEXPR), D program (DT_CTX_DPROG) or D type (DT_CTX_DTYPE). + * Once the token is returned, we clear it so this only happens once. + */ +if (yypcb->pcb_token != 0) { + int tok = yypcb->pcb_token; + yypcb->pcb_token = 0; + return (tok); +} + +%} + +<S0>auto return (DT_KEY_AUTO); +<S0>break return (DT_KEY_BREAK); +<S0>case return (DT_KEY_CASE); +<S0>char return (DT_KEY_CHAR); +<S0>const return (DT_KEY_CONST); +<S0>continue return (DT_KEY_CONTINUE); +<S0>counter return (DT_KEY_COUNTER); +<S0>default return (DT_KEY_DEFAULT); +<S0>do return (DT_KEY_DO); +<S0>double return (DT_KEY_DOUBLE); +<S0>else return (DT_KEY_ELSE); +<S0>enum return (DT_KEY_ENUM); +<S0>extern return (DT_KEY_EXTERN); +<S0>float return (DT_KEY_FLOAT); +<S0>for return (DT_KEY_FOR); +<S0>goto return (DT_KEY_GOTO); +<S0>if return (DT_KEY_IF); +<S0>import return (DT_KEY_IMPORT); +<S0>inline return (DT_KEY_INLINE); +<S0>int return (DT_KEY_INT); +<S0>long return (DT_KEY_LONG); +<S0>offsetof return (DT_TOK_OFFSETOF); +<S0>probe return (DT_KEY_PROBE); +<S0>provider return (DT_KEY_PROVIDER); +<S0>register return (DT_KEY_REGISTER); +<S0>restrict return (DT_KEY_RESTRICT); +<S0>return return (DT_KEY_RETURN); +<S0>self return (DT_KEY_SELF); +<S0>short return (DT_KEY_SHORT); +<S0>signed return (DT_KEY_SIGNED); +<S0>sizeof return (DT_TOK_SIZEOF); +<S0>static return (DT_KEY_STATIC); +<S0>string return (DT_KEY_STRING); +<S0>stringof return (DT_TOK_STRINGOF); +<S0>struct return (DT_KEY_STRUCT); +<S0>switch return (DT_KEY_SWITCH); +<S0>this return (DT_KEY_THIS); +<S0>translator return (DT_KEY_XLATOR); +<S0>typedef return (DT_KEY_TYPEDEF); +<S0>union return (DT_KEY_UNION); +<S0>unsigned return (DT_KEY_UNSIGNED); +<S0>userland return (DT_KEY_USERLAND); +<S0>void return (DT_KEY_VOID); +<S0>volatile return (DT_KEY_VOLATILE); +<S0>while return (DT_KEY_WHILE); +<S0>xlate return (DT_TOK_XLATE); + +<S2>auto { yybegin(YYS_EXPR); return (DT_KEY_AUTO); } +<S2>char { yybegin(YYS_EXPR); return (DT_KEY_CHAR); } +<S2>const { yybegin(YYS_EXPR); return (DT_KEY_CONST); } +<S2>counter { yybegin(YYS_DEFINE); return (DT_KEY_COUNTER); } +<S2>double { yybegin(YYS_EXPR); return (DT_KEY_DOUBLE); } +<S2>enum { yybegin(YYS_EXPR); return (DT_KEY_ENUM); } +<S2>extern { yybegin(YYS_EXPR); return (DT_KEY_EXTERN); } +<S2>float { yybegin(YYS_EXPR); return (DT_KEY_FLOAT); } +<S2>import { yybegin(YYS_EXPR); return (DT_KEY_IMPORT); } +<S2>inline { yybegin(YYS_DEFINE); return (DT_KEY_INLINE); } +<S2>int { yybegin(YYS_EXPR); return (DT_KEY_INT); } +<S2>long { yybegin(YYS_EXPR); return (DT_KEY_LONG); } +<S2>provider { yybegin(YYS_DEFINE); return (DT_KEY_PROVIDER); } +<S2>register { yybegin(YYS_EXPR); return (DT_KEY_REGISTER); } +<S2>restrict { yybegin(YYS_EXPR); return (DT_KEY_RESTRICT); } +<S2>self { yybegin(YYS_EXPR); return (DT_KEY_SELF); } +<S2>short { yybegin(YYS_EXPR); return (DT_KEY_SHORT); } +<S2>signed { yybegin(YYS_EXPR); return (DT_KEY_SIGNED); } +<S2>static { yybegin(YYS_EXPR); return (DT_KEY_STATIC); } +<S2>string { yybegin(YYS_EXPR); return (DT_KEY_STRING); } +<S2>struct { yybegin(YYS_EXPR); return (DT_KEY_STRUCT); } +<S2>this { yybegin(YYS_EXPR); return (DT_KEY_THIS); } +<S2>translator { yybegin(YYS_DEFINE); return (DT_KEY_XLATOR); } +<S2>typedef { yybegin(YYS_EXPR); return (DT_KEY_TYPEDEF); } +<S2>union { yybegin(YYS_EXPR); return (DT_KEY_UNION); } +<S2>unsigned { yybegin(YYS_EXPR); return (DT_KEY_UNSIGNED); } +<S2>void { yybegin(YYS_EXPR); return (DT_KEY_VOID); } +<S2>volatile { yybegin(YYS_EXPR); return (DT_KEY_VOLATILE); } + +<S0>"$$"[0-9]+ { + int i = atoi(yytext + 2); + char *v = ""; + + /* + * A macro argument reference substitutes the text of + * an argument in place of the current token. When we + * see $$<d> we fetch the saved string from pcb_sargv + * (or use the default argument if the option has been + * set and the argument hasn't been specified) and + * return a token corresponding to this string. + */ + if (i < 0 || (i >= yypcb->pcb_sargc && + !(yypcb->pcb_cflags & DTRACE_C_DEFARG))) { + xyerror(D_MACRO_UNDEF, "macro argument %s is " + "not defined\n", yytext); + } + + if (i < yypcb->pcb_sargc) { + v = yypcb->pcb_sargv[i]; /* get val from pcb */ + yypcb->pcb_sflagv[i] |= DT_IDFLG_REF; + } + + if ((yylval.l_str = strdup(v)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + (void) stresc2chr(yylval.l_str); + return (DT_TOK_STRING); + } + +<S0>"$"[0-9]+ { + int i = atoi(yytext + 1); + char *p, *v = "0"; + + /* + * A macro argument reference substitutes the text of + * one identifier or integer pattern for another. When + * we see $<d> we fetch the saved string from pcb_sargv + * (or use the default argument if the option has been + * set and the argument hasn't been specified) and + * return a token corresponding to this string. + */ + if (i < 0 || (i >= yypcb->pcb_sargc && + !(yypcb->pcb_cflags & DTRACE_C_DEFARG))) { + xyerror(D_MACRO_UNDEF, "macro argument %s is " + "not defined\n", yytext); + } + + if (i < yypcb->pcb_sargc) { + v = yypcb->pcb_sargv[i]; /* get val from pcb */ + yypcb->pcb_sflagv[i] |= DT_IDFLG_REF; + } + + /* + * If the macro text is not a valid integer or ident, + * then we treat it as a string. The string may be + * optionally enclosed in quotes, which we strip. + */ + if (strbadidnum(v)) { + size_t len = strlen(v); + + if (len != 1 && *v == '"' && v[len - 1] == '"') + yylval.l_str = strndup(v + 1, len - 2); + else + yylval.l_str = strndup(v, len); + + if (yylval.l_str == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + (void) stresc2chr(yylval.l_str); + return (DT_TOK_STRING); + } + + /* + * If the macro text is not a string an begins with a + * digit or a +/- sign, process it as an integer token. + */ + if (isdigit(v[0]) || v[0] == '-' || v[0] == '+') { + if (isdigit(v[0])) + yyintprefix = 0; + else + yyintprefix = *v++; + + errno = 0; + yylval.l_int = strtoull(v, &p, 0); + (void) strncpy(yyintsuffix, p, + sizeof (yyintsuffix)); + yyintdecimal = *v != '0'; + + if (errno == ERANGE) { + xyerror(D_MACRO_OFLOW, "macro argument" + " %s constant %s results in integer" + " overflow\n", yytext, v); + } + + return (DT_TOK_INT); + } + + return (id_or_type(v)); + } + +<S0>"$$"{RGX_IDENT} { + dt_ident_t *idp = dt_idhash_lookup( + yypcb->pcb_hdl->dt_macros, yytext + 2); + + char s[16]; /* enough for UINT_MAX + \0 */ + + if (idp == NULL) { + xyerror(D_MACRO_UNDEF, "macro variable %s " + "is not defined\n", yytext); + } + + /* + * For the moment, all current macro variables are of + * type id_t (refer to dtrace_update() for details). + */ + (void) snprintf(s, sizeof (s), "%u", idp->di_id); + if ((yylval.l_str = strdup(s)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + return (DT_TOK_STRING); + } + +<S0>"$"{RGX_IDENT} { + dt_ident_t *idp = dt_idhash_lookup( + yypcb->pcb_hdl->dt_macros, yytext + 1); + + if (idp == NULL) { + xyerror(D_MACRO_UNDEF, "macro variable %s " + "is not defined\n", yytext); + } + + /* + * For the moment, all current macro variables are of + * type id_t (refer to dtrace_update() for details). + */ + yylval.l_int = (intmax_t)(int)idp->di_id; + yyintprefix = 0; + yyintsuffix[0] = '\0'; + yyintdecimal = 1; + + return (DT_TOK_INT); + } + +<S0>{RGX_IDENT} | +<S0>{RGX_MOD_IDENT}{RGX_IDENT} | +<S0>{RGX_MOD_IDENT} { + return (id_or_type(yytext)); + } + +<S0>{RGX_AGG} { + if ((yylval.l_str = strdup(yytext)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + return (DT_TOK_AGG); + } + +<S0>"@" { + if ((yylval.l_str = strdup("@_")) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + return (DT_TOK_AGG); + } + +<S0>{RGX_INT} | +<S2>{RGX_INT} | +<S3>{RGX_INT} { + char *p; + + errno = 0; + yylval.l_int = strtoull(yytext, &p, 0); + yyintprefix = 0; + (void) strncpy(yyintsuffix, p, sizeof (yyintsuffix)); + yyintdecimal = yytext[0] != '0'; + + if (errno == ERANGE) { + xyerror(D_INT_OFLOW, "constant %s results in " + "integer overflow\n", yytext); + } + + if (*p != '\0' && strchr("uUlL", *p) == NULL) { + xyerror(D_INT_DIGIT, "constant %s contains " + "invalid digit %c\n", yytext, *p); + } + + if ((YYSTATE) != S3) + return (DT_TOK_INT); + + yypragma = dt_node_link(yypragma, + dt_node_int(yylval.l_int)); + } + +<S0>{RGX_FP} yyerror("floating-point constants are not permitted\n"); + +<S0>\"{RGX_STR}$ | +<S3>\"{RGX_STR}$ xyerror(D_STR_NL, "newline encountered in string literal"); + +<S0>\"{RGX_STR}\" | +<S3>\"{RGX_STR}\" { + /* + * Quoted string -- convert C escape sequences and + * return the string as a token. + */ + yylval.l_str = strndup(yytext + 1, yyleng - 2); + + if (yylval.l_str == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + (void) stresc2chr(yylval.l_str); + if ((YYSTATE) != S3) + return (DT_TOK_STRING); + + yypragma = dt_node_link(yypragma, + dt_node_string(yylval.l_str)); + } + +<S0>'{RGX_CHR}$ xyerror(D_CHR_NL, "newline encountered in character constant"); + +<S0>'{RGX_CHR}' { + char *s, *p, *q; + size_t nbytes; + + /* + * Character constant -- convert C escape sequences and + * return the character as an integer immediate value. + */ + if (yyleng == 2) + xyerror(D_CHR_NULL, "empty character constant"); + + s = yytext + 1; + yytext[yyleng - 1] = '\0'; + nbytes = stresc2chr(s); + yylval.l_int = 0; + yyintprefix = 0; + yyintsuffix[0] = '\0'; + yyintdecimal = 1; + + if (nbytes > sizeof (yylval.l_int)) { + xyerror(D_CHR_OFLOW, "character constant is " + "too long"); + } +#if BYTE_ORDER == _LITTLE_ENDIAN + p = ((char *)&yylval.l_int) + nbytes - 1; + for (q = s; nbytes != 0; nbytes--) + *p-- = *q++; +#else + bcopy(s, ((char *)&yylval.l_int) + + sizeof (yylval.l_int) - nbytes, nbytes); +#endif + return (DT_TOK_INT); + } + +<S0>"/*" | +<S2>"/*" { + yypcb->pcb_cstate = (YYSTATE); + BEGIN(S1); + } + +<S0>{RGX_INTERP} | +<S2>{RGX_INTERP} ; /* discard any #! lines */ + +<S0>{RGX_CTL} | +<S2>{RGX_CTL} | +<S4>{RGX_CTL} { + assert(yypragma == NULL); + yypcb->pcb_cstate = (YYSTATE); + BEGIN(S3); + } + +<S4>. ; /* discard */ +<S4>"\n" ; /* discard */ + +<S0>"/" { + int c, tok; + + /* + * The use of "/" as the predicate delimiter and as the + * integer division symbol requires special lookahead + * to avoid a shift/reduce conflict in the D grammar. + * We look ahead to the next non-whitespace character. + * If we encounter EOF, ";", "{", or "/", then this "/" + * closes the predicate and we return DT_TOK_EPRED. + * If we encounter anything else, it's DT_TOK_DIV. + */ + while ((c = input()) != 0) { + if (strchr("\f\n\r\t\v ", c) == NULL) + break; + } + + if (c == 0 || c == ';' || c == '{' || c == '/') { + if (yypcb->pcb_parens != 0) { + yyerror("closing ) expected in " + "predicate before /\n"); + } + if (yypcb->pcb_brackets != 0) { + yyerror("closing ] expected in " + "predicate before /\n"); + } + tok = DT_TOK_EPRED; + } else + tok = DT_TOK_DIV; + + unput(c); + return (tok); + } + +<S0>"(" { + yypcb->pcb_parens++; + return (DT_TOK_LPAR); + } + +<S0>")" { + if (--yypcb->pcb_parens < 0) + yyerror("extra ) in input stream\n"); + return (DT_TOK_RPAR); + } + +<S0>"[" { + yypcb->pcb_brackets++; + return (DT_TOK_LBRAC); + } + +<S0>"]" { + if (--yypcb->pcb_brackets < 0) + yyerror("extra ] in input stream\n"); + return (DT_TOK_RBRAC); + } + +<S0>"{" | +<S2>"{" { + yypcb->pcb_braces++; + return ('{'); + } + +<S0>"}" { + if (--yypcb->pcb_braces < 0) + yyerror("extra } in input stream\n"); + return ('}'); + } + +<S0>"|" return (DT_TOK_BOR); +<S0>"^" return (DT_TOK_XOR); +<S0>"&" return (DT_TOK_BAND); +<S0>"&&" return (DT_TOK_LAND); +<S0>"^^" return (DT_TOK_LXOR); +<S0>"||" return (DT_TOK_LOR); +<S0>"==" return (DT_TOK_EQU); +<S0>"!=" return (DT_TOK_NEQ); +<S0>"<" return (DT_TOK_LT); +<S0>"<=" return (DT_TOK_LE); +<S0>">" return (DT_TOK_GT); +<S0>">=" return (DT_TOK_GE); +<S0>"<<" return (DT_TOK_LSH); +<S0>">>" return (DT_TOK_RSH); +<S0>"+" return (DT_TOK_ADD); +<S0>"-" return (DT_TOK_SUB); +<S0>"*" return (DT_TOK_MUL); +<S0>"%" return (DT_TOK_MOD); +<S0>"~" return (DT_TOK_BNEG); +<S0>"!" return (DT_TOK_LNEG); +<S0>"?" return (DT_TOK_QUESTION); +<S0>":" return (DT_TOK_COLON); +<S0>"." return (DT_TOK_DOT); +<S0>"->" return (DT_TOK_PTR); +<S0>"=" return (DT_TOK_ASGN); +<S0>"+=" return (DT_TOK_ADD_EQ); +<S0>"-=" return (DT_TOK_SUB_EQ); +<S0>"*=" return (DT_TOK_MUL_EQ); +<S0>"/=" return (DT_TOK_DIV_EQ); +<S0>"%=" return (DT_TOK_MOD_EQ); +<S0>"&=" return (DT_TOK_AND_EQ); +<S0>"^=" return (DT_TOK_XOR_EQ); +<S0>"|=" return (DT_TOK_OR_EQ); +<S0>"<<=" return (DT_TOK_LSH_EQ); +<S0>">>=" return (DT_TOK_RSH_EQ); +<S0>"++" return (DT_TOK_ADDADD); +<S0>"--" return (DT_TOK_SUBSUB); +<S0>"..." return (DT_TOK_ELLIPSIS); +<S0>"," return (DT_TOK_COMMA); +<S0>";" return (';'); +<S0>{RGX_WS} ; /* discard */ +<S0>"\\"\n ; /* discard */ +<S0>. yyerror("syntax error near \"%c\"\n", yytext[0]); + +<S1>"/*" yyerror("/* encountered inside a comment\n"); +<S1>"*/" BEGIN(yypcb->pcb_cstate); +<S1>.|\n ; /* discard */ + +<S2>{RGX_PSPEC} { + /* + * S2 has an ambiguity because RGX_PSPEC includes '*' + * as a glob character and '*' also can be DT_TOK_STAR. + * Since lex always matches the longest token, this + * rule can be matched by an input string like "int*", + * which could begin a global variable declaration such + * as "int*x;" or could begin a RGX_PSPEC with globbing + * such as "int* { trace(timestamp); }". If C_PSPEC is + * not set, we must resolve the ambiguity in favor of + * the type and perform lexer pushback if the fragment + * before '*' or entire fragment matches a type name. + * If C_PSPEC is set, we always return a PSPEC token. + * If C_PSPEC is off, the user can avoid ambiguity by + * including a ':' delimiter in the specifier, which + * they should be doing anyway to specify the provider. + */ + if (!(yypcb->pcb_cflags & DTRACE_C_PSPEC) && + strchr(yytext, ':') == NULL) { + + char *p = strchr(yytext, '*'); + char *q = yytext + yyleng - 1; + + if (p != NULL && p > yytext) + *p = '\0'; /* prune yytext */ + + if (dt_type_lookup(yytext, NULL) == 0) { + yylval.l_str = strdup(yytext); + + if (yylval.l_str == NULL) { + longjmp(yypcb->pcb_jmpbuf, + EDT_NOMEM); + } + + if (p != NULL && p > yytext) { + for (*p = '*'; q >= p; q--) + unput(*q); + } + + yybegin(YYS_EXPR); + return (DT_TOK_TNAME); + } + + if (p != NULL && p > yytext) + *p = '*'; /* restore yytext */ + } + + if ((yylval.l_str = strdup(yytext)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + return (DT_TOK_PSPEC); + } + +<S2>"/" return (DT_TOK_DIV); +<S2>"," return (DT_TOK_COMMA); + +<S2>{RGX_WS} ; /* discard */ +<S2>. yyerror("syntax error near \"%c\"\n", yytext[0]); + +<S3>\n { + dt_pragma(yypragma); + yypragma = NULL; + BEGIN(yypcb->pcb_cstate); + } + +<S3>[\f\t\v ]+ ; /* discard */ + +<S3>[^\f\n\t\v "]+ { + dt_node_t *dnp; + + if ((yylval.l_str = strdup(yytext)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + /* + * We want to call dt_node_ident() here, but we can't + * because it will expand inlined identifiers, which we + * don't want to do from #pragma context in order to + * support pragmas that apply to the ident itself. We + * call dt_node_string() and then reset dn_op instead. + */ + dnp = dt_node_string(yylval.l_str); + dnp->dn_kind = DT_NODE_IDENT; + dnp->dn_op = DT_TOK_IDENT; + yypragma = dt_node_link(yypragma, dnp); + } + +<S3>. yyerror("syntax error near \"%c\"\n", yytext[0]); + +%% + +/* + * yybegin provides a wrapper for use from C code around the lex BEGIN() macro. + * We use two main states for lexing because probe descriptions use a syntax + * that is incompatible with the normal D tokens (e.g. names can contain "-"). + * yybegin also handles the job of switching between two lists of dt_nodes + * as we allocate persistent definitions, like inlines, and transient nodes + * that will be freed once we are done parsing the current program file. + */ +void +yybegin(yystate_t state) +{ +#ifdef YYDEBUG + yydebug = _dtrace_debug; +#endif + if (yypcb->pcb_yystate == state) + return; /* nothing to do if we're in the state already */ + + if (yypcb->pcb_yystate == YYS_DEFINE) { + yypcb->pcb_list = yypcb->pcb_hold; + yypcb->pcb_hold = NULL; + } + + switch (state) { + case YYS_CLAUSE: + BEGIN(S2); + break; + case YYS_DEFINE: + assert(yypcb->pcb_hold == NULL); + yypcb->pcb_hold = yypcb->pcb_list; + yypcb->pcb_list = NULL; + /*FALLTHRU*/ + case YYS_EXPR: + BEGIN(S0); + break; + case YYS_DONE: + break; + case YYS_CONTROL: + BEGIN(S4); + break; + default: + xyerror(D_UNKNOWN, "internal error -- bad yystate %d\n", state); + } + + yypcb->pcb_yystate = state; +} + +void +yyinit(dt_pcb_t *pcb) +{ + yypcb = pcb; + yylineno = 1; + yypragma = NULL; +#if defined(sun) + yysptr = yysbuf; +#endif +} + +/* + * Given a lexeme 's' (typically yytext), set yylval and return an appropriate + * token to the parser indicating either an identifier or a typedef name. + * User-defined global variables always take precedence over types, but we do + * use some heuristics because D programs can look at an ever-changing set of + * kernel types and also can implicitly instantiate variables by assignment, + * unlike in C. The code here is ordered carefully as lookups are not cheap. + */ +static int +id_or_type(const char *s) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_decl_t *ddp = yypcb->pcb_dstack.ds_decl; + int c0, c1, ttok = DT_TOK_TNAME; + dt_ident_t *idp; + + if ((s = yylval.l_str = strdup(s)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + /* + * If the lexeme is a global variable or likely identifier or *not* a + * type_name, then it is an identifier token. + */ + if (dt_idstack_lookup(&yypcb->pcb_globals, s) != NULL || + dt_idhash_lookup(yypcb->pcb_idents, s) != NULL || + dt_type_lookup(s, NULL) != 0) + return (DT_TOK_IDENT); + + /* + * If we're in the midst of parsing a declaration and a type_specifier + * has already been shifted, then return DT_TOK_IDENT instead of TNAME. + * This semantic is necessary to permit valid ISO C code such as: + * + * typedef int foo; + * struct s { foo foo; }; + * + * without causing shift/reduce conflicts in the direct_declarator part + * of the grammar. The result is that we must check for conflicting + * redeclarations of the same identifier as part of dt_node_decl(). + */ + if (ddp != NULL && ddp->dd_name != NULL) + return (DT_TOK_IDENT); + + /* + * If the lexeme is a type name and we are not in a program clause, + * then always interpret it as a type and return DT_TOK_TNAME. + */ + if ((YYSTATE) != S0) + return (DT_TOK_TNAME); + + /* + * If the lexeme matches a type name but is in a program clause, then + * it could be a type or it could be an undefined variable. Peek at + * the next token to decide. If we see ++, --, [, or =, we know there + * might be an assignment that is trying to create a global variable, + * so we optimistically return DT_TOK_IDENT. There is no harm in being + * wrong: a type_name followed by ++, --, [, or = is a syntax error. + */ + while ((c0 = input()) != 0) { + if (strchr("\f\n\r\t\v ", c0) == NULL) + break; + } + + switch (c0) { + case '+': + case '-': + if ((c1 = input()) == c0) + ttok = DT_TOK_IDENT; + unput(c1); + break; + + case '=': + if ((c1 = input()) != c0) + ttok = DT_TOK_IDENT; + unput(c1); + break; + case '[': + ttok = DT_TOK_IDENT; + break; + } + + if (ttok == DT_TOK_IDENT) { + idp = dt_idhash_insert(yypcb->pcb_idents, s, DT_IDENT_SCALAR, 0, + 0, _dtrace_defattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen); + + if (idp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + } + + unput(c0); + return (ttok); +} + +#if defined(sun) +static int +input(void) +{ + int c; + + if (yysptr > yysbuf) + c = *--yysptr; + else if (yypcb->pcb_fileptr != NULL) + c = fgetc(yypcb->pcb_fileptr); + else if (yypcb->pcb_strptr < yypcb->pcb_string + yypcb->pcb_strlen) + c = *(unsigned char *)(yypcb->pcb_strptr++); + else + c = EOF; + + if (c == '\n') + yylineno++; + + if (c != EOF) + return (c); + + if ((YYSTATE) == S1) + yyerror("end-of-file encountered before matching */\n"); + + if ((YYSTATE) == S3) + yyerror("end-of-file encountered before end of control line\n"); + + if (yypcb->pcb_fileptr != NULL && ferror(yypcb->pcb_fileptr)) + longjmp(yypcb->pcb_jmpbuf, EDT_FIO); + + return (0); /* EOF */ +} + +static void +unput(int c) +{ + if (c == '\n') + yylineno--; + + *yysptr++ = c; + yytchar = c; +} +#endif diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c new file mode 100644 index 0000000..597fef4 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c @@ -0,0 +1,1906 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#define ELF_TARGET_ALL +#include <elf.h> + +#include <sys/types.h> +#if defined(sun) +#include <sys/sysmacros.h> +#else +#define P2ROUNDUP(x, align) (-(-(x) & -(align))) +#endif + +#include <unistd.h> +#include <strings.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <limits.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#if defined(sun) +#include <wait.h> +#else +#include <sys/wait.h> +#include <libelf.h> +#include <gelf.h> +#include <sys/mman.h> +#endif +#include <assert.h> +#include <sys/ipc.h> + +#include <dt_impl.h> +#include <dt_provider.h> +#include <dt_program.h> +#include <dt_string.h> + +#define ESHDR_NULL 0 +#define ESHDR_SHSTRTAB 1 +#define ESHDR_DOF 2 +#define ESHDR_STRTAB 3 +#define ESHDR_SYMTAB 4 +#define ESHDR_REL 5 +#define ESHDR_NUM 6 + +#define PWRITE_SCN(index, data) \ + (lseek64(fd, (off64_t)elf_file.shdr[(index)].sh_offset, SEEK_SET) != \ + (off64_t)elf_file.shdr[(index)].sh_offset || \ + dt_write(dtp, fd, (data), elf_file.shdr[(index)].sh_size) != \ + elf_file.shdr[(index)].sh_size) + +static const char DTRACE_SHSTRTAB32[] = "\0" +".shstrtab\0" /* 1 */ +".SUNW_dof\0" /* 11 */ +".strtab\0" /* 21 */ +".symtab\0" /* 29 */ +#ifdef __sparc +".rela.SUNW_dof"; /* 37 */ +#else +".rel.SUNW_dof"; /* 37 */ +#endif + +static const char DTRACE_SHSTRTAB64[] = "\0" +".shstrtab\0" /* 1 */ +".SUNW_dof\0" /* 11 */ +".strtab\0" /* 21 */ +".symtab\0" /* 29 */ +".rela.SUNW_dof"; /* 37 */ + +static const char DOFSTR[] = "__SUNW_dof"; +static const char DOFLAZYSTR[] = "___SUNW_dof"; + +typedef struct dt_link_pair { + struct dt_link_pair *dlp_next; /* next pair in linked list */ + void *dlp_str; /* buffer for string table */ + void *dlp_sym; /* buffer for symbol table */ +} dt_link_pair_t; + +typedef struct dof_elf32 { + uint32_t de_nrel; /* relocation count */ +#ifdef __sparc + Elf32_Rela *de_rel; /* array of relocations for sparc */ +#else + Elf32_Rel *de_rel; /* array of relocations for x86 */ +#endif + uint32_t de_nsym; /* symbol count */ + Elf32_Sym *de_sym; /* array of symbols */ + uint32_t de_strlen; /* size of of string table */ + char *de_strtab; /* string table */ + uint32_t de_global; /* index of the first global symbol */ +} dof_elf32_t; + +static int +prepare_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf32_t *dep) +{ + dof_sec_t *dofs, *s; + dof_relohdr_t *dofrh; + dof_relodesc_t *dofr; + char *strtab; + int i, j, nrel; + size_t strtabsz = 1; + uint32_t count = 0; + size_t base; + Elf32_Sym *sym; +#ifdef __sparc + Elf32_Rela *rel; +#else + Elf32_Rel *rel; +#endif + + /*LINTED*/ + dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff); + + /* + * First compute the size of the string table and the number of + * relocations present in the DOF. + */ + for (i = 0; i < dof->dofh_secnum; i++) { + if (dofs[i].dofs_type != DOF_SECT_URELHDR) + continue; + + /*LINTED*/ + dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset); + + s = &dofs[dofrh->dofr_strtab]; + strtab = (char *)dof + s->dofs_offset; + assert(strtab[0] == '\0'); + strtabsz += s->dofs_size - 1; + + s = &dofs[dofrh->dofr_relsec]; + /*LINTED*/ + dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset); + count += s->dofs_size / s->dofs_entsize; + } + + dep->de_strlen = strtabsz; + dep->de_nrel = count; + dep->de_nsym = count + 1; /* the first symbol is always null */ + + if (dtp->dt_lazyload) { + dep->de_strlen += sizeof (DOFLAZYSTR); + dep->de_nsym++; + } else { + dep->de_strlen += sizeof (DOFSTR); + dep->de_nsym++; + } + + if ((dep->de_rel = calloc(dep->de_nrel, + sizeof (dep->de_rel[0]))) == NULL) { + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf32_Sym))) == NULL) { + free(dep->de_rel); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) { + free(dep->de_rel); + free(dep->de_sym); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + count = 0; + strtabsz = 1; + dep->de_strtab[0] = '\0'; + rel = dep->de_rel; + sym = dep->de_sym; + dep->de_global = 1; + + /* + * The first symbol table entry must be zeroed and is always ignored. + */ + bzero(sym, sizeof (Elf32_Sym)); + sym++; + + /* + * Take a second pass through the DOF sections filling in the + * memory we allocated. + */ + for (i = 0; i < dof->dofh_secnum; i++) { + if (dofs[i].dofs_type != DOF_SECT_URELHDR) + continue; + + /*LINTED*/ + dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset); + + s = &dofs[dofrh->dofr_strtab]; + strtab = (char *)dof + s->dofs_offset; + bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size); + base = strtabsz; + strtabsz += s->dofs_size - 1; + + s = &dofs[dofrh->dofr_relsec]; + /*LINTED*/ + dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset); + nrel = s->dofs_size / s->dofs_entsize; + + s = &dofs[dofrh->dofr_tgtsec]; + + for (j = 0; j < nrel; j++) { +#if defined(__arm__) +/* XXX */ +printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); +#elif defined(__i386) || defined(__amd64) + rel->r_offset = s->dofs_offset + + dofr[j].dofr_offset; + rel->r_info = ELF32_R_INFO(count + dep->de_global, + R_386_32); +#elif defined(__mips__) +/* XXX */ +printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); +#elif defined(__powerpc__) + /* + * Add 4 bytes to hit the low half of this 64-bit + * big-endian address. + */ + rel->r_offset = s->dofs_offset + + dofr[j].dofr_offset + 4; + rel->r_info = ELF32_R_INFO(count + dep->de_global, + R_PPC_REL32); +#elif defined(__sparc) + /* + * Add 4 bytes to hit the low half of this 64-bit + * big-endian address. + */ + rel->r_offset = s->dofs_offset + + dofr[j].dofr_offset + 4; + rel->r_info = ELF32_R_INFO(count + dep->de_global, + R_SPARC_32); +#else +#error unknown ISA +#endif + + sym->st_name = base + dofr[j].dofr_name - 1; + sym->st_value = 0; + sym->st_size = 0; + sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC); + sym->st_other = 0; + sym->st_shndx = SHN_UNDEF; + + rel++; + sym++; + count++; + } + } + + /* + * Add a symbol for the DOF itself. We use a different symbol for + * lazily and actively loaded DOF to make them easy to distinguish. + */ + sym->st_name = strtabsz; + sym->st_value = 0; + sym->st_size = dof->dofh_filesz; + sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_OBJECT); + sym->st_other = 0; + sym->st_shndx = ESHDR_DOF; + sym++; + + if (dtp->dt_lazyload) { + bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz, + sizeof (DOFLAZYSTR)); + strtabsz += sizeof (DOFLAZYSTR); + } else { + bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR)); + strtabsz += sizeof (DOFSTR); + } + + assert(count == dep->de_nrel); + assert(strtabsz == dep->de_strlen); + + return (0); +} + + +typedef struct dof_elf64 { + uint32_t de_nrel; + Elf64_Rela *de_rel; + uint32_t de_nsym; + Elf64_Sym *de_sym; + + uint32_t de_strlen; + char *de_strtab; + + uint32_t de_global; +} dof_elf64_t; + +static int +prepare_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf64_t *dep) +{ + dof_sec_t *dofs, *s; + dof_relohdr_t *dofrh; + dof_relodesc_t *dofr; + char *strtab; + int i, j, nrel; + size_t strtabsz = 1; +#if defined(sun) + uint32_t count = 0; +#else + uint64_t count = 0; +#endif + size_t base; + Elf64_Sym *sym; + Elf64_Rela *rel; + + /*LINTED*/ + dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff); + + /* + * First compute the size of the string table and the number of + * relocations present in the DOF. + */ + for (i = 0; i < dof->dofh_secnum; i++) { + if (dofs[i].dofs_type != DOF_SECT_URELHDR) + continue; + + /*LINTED*/ + dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset); + + s = &dofs[dofrh->dofr_strtab]; + strtab = (char *)dof + s->dofs_offset; + assert(strtab[0] == '\0'); + strtabsz += s->dofs_size - 1; + + s = &dofs[dofrh->dofr_relsec]; + /*LINTED*/ + dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset); + count += s->dofs_size / s->dofs_entsize; + } + + dep->de_strlen = strtabsz; + dep->de_nrel = count; + dep->de_nsym = count + 1; /* the first symbol is always null */ + + if (dtp->dt_lazyload) { + dep->de_strlen += sizeof (DOFLAZYSTR); + dep->de_nsym++; + } else { + dep->de_strlen += sizeof (DOFSTR); + dep->de_nsym++; + } + + if ((dep->de_rel = calloc(dep->de_nrel, + sizeof (dep->de_rel[0]))) == NULL) { + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf64_Sym))) == NULL) { + free(dep->de_rel); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) { + free(dep->de_rel); + free(dep->de_sym); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + count = 0; + strtabsz = 1; + dep->de_strtab[0] = '\0'; + rel = dep->de_rel; + sym = dep->de_sym; + dep->de_global = 1; + + /* + * The first symbol table entry must be zeroed and is always ignored. + */ + bzero(sym, sizeof (Elf64_Sym)); + sym++; + + /* + * Take a second pass through the DOF sections filling in the + * memory we allocated. + */ + for (i = 0; i < dof->dofh_secnum; i++) { + if (dofs[i].dofs_type != DOF_SECT_URELHDR) + continue; + + /*LINTED*/ + dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset); + + s = &dofs[dofrh->dofr_strtab]; + strtab = (char *)dof + s->dofs_offset; + bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size); + base = strtabsz; + strtabsz += s->dofs_size - 1; + + s = &dofs[dofrh->dofr_relsec]; + /*LINTED*/ + dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset); + nrel = s->dofs_size / s->dofs_entsize; + + s = &dofs[dofrh->dofr_tgtsec]; + + for (j = 0; j < nrel; j++) { +#if defined(__arm__) +/* XXX */ +#elif defined(__mips__) +/* XXX */ +#elif defined(__powerpc__) + rel->r_offset = s->dofs_offset + + dofr[j].dofr_offset; + rel->r_info = ELF64_R_INFO(count + dep->de_global, + R_PPC64_REL64); +#elif defined(__i386) || defined(__amd64) + rel->r_offset = s->dofs_offset + + dofr[j].dofr_offset; +#if defined(sun) + rel->r_info = ELF64_R_INFO(count + dep->de_global, + R_AMD64_64); +#else + rel->r_info = ELF64_R_INFO(count + dep->de_global, + R_X86_64_RELATIVE); +#endif +#elif defined(__sparc) + rel->r_offset = s->dofs_offset + + dofr[j].dofr_offset; + rel->r_info = ELF64_R_INFO(count + dep->de_global, + R_SPARC_64); +#else +#error unknown ISA +#endif + + sym->st_name = base + dofr[j].dofr_name - 1; + sym->st_value = 0; + sym->st_size = 0; + sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC); + sym->st_other = 0; + sym->st_shndx = SHN_UNDEF; + + rel++; + sym++; + count++; + } + } + + /* + * Add a symbol for the DOF itself. We use a different symbol for + * lazily and actively loaded DOF to make them easy to distinguish. + */ + sym->st_name = strtabsz; + sym->st_value = 0; + sym->st_size = dof->dofh_filesz; + sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_OBJECT); + sym->st_other = 0; + sym->st_shndx = ESHDR_DOF; + sym++; + + if (dtp->dt_lazyload) { + bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz, + sizeof (DOFLAZYSTR)); + strtabsz += sizeof (DOFLAZYSTR); + } else { + bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR)); + strtabsz += sizeof (DOFSTR); + } + + assert(count == dep->de_nrel); + assert(strtabsz == dep->de_strlen); + + return (0); +} + +/* + * Write out an ELF32 file prologue consisting of a header, section headers, + * and a section header string table. The DOF data will follow this prologue + * and complete the contents of the given ELF file. + */ +static int +dump_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd) +{ + struct { + Elf32_Ehdr ehdr; + Elf32_Shdr shdr[ESHDR_NUM]; + } elf_file; + + Elf32_Shdr *shp; + Elf32_Off off; + dof_elf32_t de; + int ret = 0; + uint_t nshdr; + + if (prepare_elf32(dtp, dof, &de) != 0) + return (-1); /* errno is set for us */ + + /* + * If there are no relocations, we only need enough sections for + * the shstrtab and the DOF. + */ + nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM; + + bzero(&elf_file, sizeof (elf_file)); + + elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0; + elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1; + elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2; + elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3; + elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT; + elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS32; +#if BYTE_ORDER == _BIG_ENDIAN + elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB; +#else + elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB; +#endif +#if defined(__FreeBSD__) + elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD; +#endif + elf_file.ehdr.e_type = ET_REL; +#if defined(__arm__) + elf_file.ehdr.e_machine = EM_ARM; +#elif defined(__mips__) + elf_file.ehdr.e_machine = EM_MIPS; +#elif defined(__powerpc__) + elf_file.ehdr.e_machine = EM_PPC; +#elif defined(__sparc) + elf_file.ehdr.e_machine = EM_SPARC; +#elif defined(__i386) || defined(__amd64) + elf_file.ehdr.e_machine = EM_386; +#endif + elf_file.ehdr.e_version = EV_CURRENT; + elf_file.ehdr.e_shoff = sizeof (Elf32_Ehdr); + elf_file.ehdr.e_ehsize = sizeof (Elf32_Ehdr); + elf_file.ehdr.e_phentsize = sizeof (Elf32_Phdr); + elf_file.ehdr.e_shentsize = sizeof (Elf32_Shdr); + elf_file.ehdr.e_shnum = nshdr; + elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB; + off = sizeof (elf_file) + nshdr * sizeof (Elf32_Shdr); + + shp = &elf_file.shdr[ESHDR_SHSTRTAB]; + shp->sh_name = 1; /* DTRACE_SHSTRTAB32[1] = ".shstrtab" */ + shp->sh_type = SHT_STRTAB; + shp->sh_offset = off; + shp->sh_size = sizeof (DTRACE_SHSTRTAB32); + shp->sh_addralign = sizeof (char); + off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8); + + shp = &elf_file.shdr[ESHDR_DOF]; + shp->sh_name = 11; /* DTRACE_SHSTRTAB32[11] = ".SUNW_dof" */ + shp->sh_flags = SHF_ALLOC; + shp->sh_type = SHT_SUNW_dof; + shp->sh_offset = off; + shp->sh_size = dof->dofh_filesz; + shp->sh_addralign = 8; + off = shp->sh_offset + shp->sh_size; + + shp = &elf_file.shdr[ESHDR_STRTAB]; + shp->sh_name = 21; /* DTRACE_SHSTRTAB32[21] = ".strtab" */ + shp->sh_flags = SHF_ALLOC; + shp->sh_type = SHT_STRTAB; + shp->sh_offset = off; + shp->sh_size = de.de_strlen; + shp->sh_addralign = sizeof (char); + off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4); + + shp = &elf_file.shdr[ESHDR_SYMTAB]; + shp->sh_name = 29; /* DTRACE_SHSTRTAB32[29] = ".symtab" */ + shp->sh_flags = SHF_ALLOC; + shp->sh_type = SHT_SYMTAB; + shp->sh_entsize = sizeof (Elf32_Sym); + shp->sh_link = ESHDR_STRTAB; + shp->sh_offset = off; + shp->sh_info = de.de_global; + shp->sh_size = de.de_nsym * sizeof (Elf32_Sym); + shp->sh_addralign = 4; + off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4); + + if (de.de_nrel == 0) { + if (dt_write(dtp, fd, &elf_file, + sizeof (elf_file)) != sizeof (elf_file) || + PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) || + PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) || + PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) || + PWRITE_SCN(ESHDR_DOF, dof)) { + ret = dt_set_errno(dtp, errno); + } + } else { + shp = &elf_file.shdr[ESHDR_REL]; + shp->sh_name = 37; /* DTRACE_SHSTRTAB32[37] = ".rel.SUNW_dof" */ + shp->sh_flags = SHF_ALLOC; +#ifdef __sparc + shp->sh_type = SHT_RELA; +#else + shp->sh_type = SHT_REL; +#endif + shp->sh_entsize = sizeof (de.de_rel[0]); + shp->sh_link = ESHDR_SYMTAB; + shp->sh_info = ESHDR_DOF; + shp->sh_offset = off; + shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]); + shp->sh_addralign = 4; + + if (dt_write(dtp, fd, &elf_file, + sizeof (elf_file)) != sizeof (elf_file) || + PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) || + PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) || + PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) || + PWRITE_SCN(ESHDR_REL, de.de_rel) || + PWRITE_SCN(ESHDR_DOF, dof)) { + ret = dt_set_errno(dtp, errno); + } + } + + free(de.de_strtab); + free(de.de_sym); + free(de.de_rel); + + return (ret); +} + +/* + * Write out an ELF64 file prologue consisting of a header, section headers, + * and a section header string table. The DOF data will follow this prologue + * and complete the contents of the given ELF file. + */ +static int +dump_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd) +{ + struct { + Elf64_Ehdr ehdr; + Elf64_Shdr shdr[ESHDR_NUM]; + } elf_file; + + Elf64_Shdr *shp; + Elf64_Off off; + dof_elf64_t de; + int ret = 0; + uint_t nshdr; + + if (prepare_elf64(dtp, dof, &de) != 0) + return (-1); /* errno is set for us */ + + /* + * If there are no relocations, we only need enough sections for + * the shstrtab and the DOF. + */ + nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM; + + bzero(&elf_file, sizeof (elf_file)); + + elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0; + elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1; + elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2; + elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3; + elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT; + elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS64; +#if BYTE_ORDER == _BIG_ENDIAN + elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB; +#else + elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB; +#endif +#if defined(__FreeBSD__) + elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD; +#endif + elf_file.ehdr.e_type = ET_REL; +#if defined(__arm__) + elf_file.ehdr.e_machine = EM_ARM; +#elif defined(__mips__) + elf_file.ehdr.e_machine = EM_MIPS; +#elif defined(__powerpc__) + elf_file.ehdr.e_machine = EM_PPC; +#elif defined(__sparc) + elf_file.ehdr.e_machine = EM_SPARCV9; +#elif defined(__i386) || defined(__amd64) + elf_file.ehdr.e_machine = EM_AMD64; +#endif + elf_file.ehdr.e_version = EV_CURRENT; + elf_file.ehdr.e_shoff = sizeof (Elf64_Ehdr); + elf_file.ehdr.e_ehsize = sizeof (Elf64_Ehdr); + elf_file.ehdr.e_phentsize = sizeof (Elf64_Phdr); + elf_file.ehdr.e_shentsize = sizeof (Elf64_Shdr); + elf_file.ehdr.e_shnum = nshdr; + elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB; + off = sizeof (elf_file) + nshdr * sizeof (Elf64_Shdr); + + shp = &elf_file.shdr[ESHDR_SHSTRTAB]; + shp->sh_name = 1; /* DTRACE_SHSTRTAB64[1] = ".shstrtab" */ + shp->sh_type = SHT_STRTAB; + shp->sh_offset = off; + shp->sh_size = sizeof (DTRACE_SHSTRTAB64); + shp->sh_addralign = sizeof (char); + off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8); + + shp = &elf_file.shdr[ESHDR_DOF]; + shp->sh_name = 11; /* DTRACE_SHSTRTAB64[11] = ".SUNW_dof" */ +#if defined(sun) + shp->sh_flags = SHF_ALLOC; +#else + shp->sh_flags = SHF_WRITE | SHF_ALLOC; +#endif + shp->sh_type = SHT_SUNW_dof; + shp->sh_offset = off; + shp->sh_size = dof->dofh_filesz; + shp->sh_addralign = 8; + off = shp->sh_offset + shp->sh_size; + + shp = &elf_file.shdr[ESHDR_STRTAB]; + shp->sh_name = 21; /* DTRACE_SHSTRTAB64[21] = ".strtab" */ + shp->sh_flags = SHF_ALLOC; + shp->sh_type = SHT_STRTAB; + shp->sh_offset = off; + shp->sh_size = de.de_strlen; + shp->sh_addralign = sizeof (char); + off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8); + + shp = &elf_file.shdr[ESHDR_SYMTAB]; + shp->sh_name = 29; /* DTRACE_SHSTRTAB64[29] = ".symtab" */ + shp->sh_flags = SHF_ALLOC; + shp->sh_type = SHT_SYMTAB; + shp->sh_entsize = sizeof (Elf64_Sym); + shp->sh_link = ESHDR_STRTAB; + shp->sh_offset = off; + shp->sh_info = de.de_global; + shp->sh_size = de.de_nsym * sizeof (Elf64_Sym); + shp->sh_addralign = 8; + off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8); + + if (de.de_nrel == 0) { + if (dt_write(dtp, fd, &elf_file, + sizeof (elf_file)) != sizeof (elf_file) || + PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) || + PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) || + PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) || + PWRITE_SCN(ESHDR_DOF, dof)) { + ret = dt_set_errno(dtp, errno); + } + } else { + shp = &elf_file.shdr[ESHDR_REL]; + shp->sh_name = 37; /* DTRACE_SHSTRTAB64[37] = ".rel.SUNW_dof" */ + shp->sh_flags = SHF_ALLOC; + shp->sh_type = SHT_RELA; + shp->sh_entsize = sizeof (de.de_rel[0]); + shp->sh_link = ESHDR_SYMTAB; + shp->sh_info = ESHDR_DOF; + shp->sh_offset = off; + shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]); + shp->sh_addralign = 8; + + if (dt_write(dtp, fd, &elf_file, + sizeof (elf_file)) != sizeof (elf_file) || + PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) || + PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) || + PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) || + PWRITE_SCN(ESHDR_REL, de.de_rel) || + PWRITE_SCN(ESHDR_DOF, dof)) { + ret = dt_set_errno(dtp, errno); + } + } + + free(de.de_strtab); + free(de.de_sym); + free(de.de_rel); + + return (ret); +} + +static int +dt_symtab_lookup(Elf_Data *data_sym, int nsym, uintptr_t addr, uint_t shn, + GElf_Sym *sym) +{ + int i, ret = -1; + GElf_Sym s; + + for (i = 0; i < nsym && gelf_getsym(data_sym, i, sym) != NULL; i++) { + if (GELF_ST_TYPE(sym->st_info) == STT_FUNC && + shn == sym->st_shndx && + sym->st_value <= addr && + addr < sym->st_value + sym->st_size) { + if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL) + return (0); + + ret = 0; + s = *sym; + } + } + + if (ret == 0) + *sym = s; + return (ret); +} + +#if defined(__arm__) +/* XXX */ +static int +dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, + uint32_t *off) +{ +printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + return (0); +} +#elif defined(__mips__) +/* XXX */ +static int +dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, + uint32_t *off) +{ +printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); + return (0); +} +#elif defined(__powerpc__) +/* The sentinel is 'xor r3,r3,r3'. */ +#define DT_OP_XOR_R3 0x7c631a78 + +#define DT_OP_NOP 0x60000000 +#define DT_OP_BLR 0x4e800020 + +/* This captures all forms of branching to address. */ +#define DT_IS_BRANCH(inst) ((inst & 0xfc000000) == 0x48000000) +#define DT_IS_BL(inst) (DT_IS_BRANCH(inst) && (inst & 0x01)) + +/* XXX */ +static int +dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, + uint32_t *off) +{ + uint32_t *ip; + + if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0) + return (-1); + + /*LINTED*/ + ip = (uint32_t *)(p + rela->r_offset); + + /* + * We only know about some specific relocation types. + */ + if (GELF_R_TYPE(rela->r_info) != R_PPC_REL24 && + GELF_R_TYPE(rela->r_info) != R_PPC_PLTREL24) + return (-1); + + /* + * We may have already processed this object file in an earlier linker + * invocation. Check to see if the present instruction sequence matches + * the one we would install below. + */ + if (isenabled) { + if (ip[0] == DT_OP_XOR_R3) { + (*off) += sizeof (ip[0]); + return (0); + } + } else { + if (ip[0] == DT_OP_NOP) { + (*off) += sizeof (ip[0]); + return (0); + } + } + + /* + * We only expect branch to address instructions. + */ + if (!DT_IS_BRANCH(ip[0])) { + dt_dprintf("found %x instead of a branch instruction at %llx\n", + ip[0], (u_longlong_t)rela->r_offset); + return (-1); + } + + if (isenabled) { + /* + * It would necessarily indicate incorrect usage if an is- + * enabled probe were tail-called so flag that as an error. + * It's also potentially (very) tricky to handle gracefully, + * but could be done if this were a desired use scenario. + */ + if (!DT_IS_BL(ip[0])) { + dt_dprintf("tail call to is-enabled probe at %llx\n", + (u_longlong_t)rela->r_offset); + return (-1); + } + + ip[0] = DT_OP_XOR_R3; + (*off) += sizeof (ip[0]); + } else { + if (DT_IS_BL(ip[0])) + ip[0] = DT_OP_NOP; + else + ip[0] = DT_OP_BLR; + } + + return (0); +} + +#elif defined(__sparc) + +#define DT_OP_RET 0x81c7e008 +#define DT_OP_NOP 0x01000000 +#define DT_OP_CALL 0x40000000 +#define DT_OP_CLR_O0 0x90102000 + +#define DT_IS_MOV_O7(inst) (((inst) & 0xffffe000) == 0x9e100000) +#define DT_IS_RESTORE(inst) (((inst) & 0xc1f80000) == 0x81e80000) +#define DT_IS_RETL(inst) (((inst) & 0xfff83fff) == 0x81c02008) + +#define DT_RS2(inst) ((inst) & 0x1f) +#define DT_MAKE_RETL(reg) (0x81c02008 | ((reg) << 14)) + +/*ARGSUSED*/ +static int +dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, + uint32_t *off) +{ + uint32_t *ip; + + if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0) + return (-1); + + /*LINTED*/ + ip = (uint32_t *)(p + rela->r_offset); + + /* + * We only know about some specific relocation types. + */ + if (GELF_R_TYPE(rela->r_info) != R_SPARC_WDISP30 && + GELF_R_TYPE(rela->r_info) != R_SPARC_WPLT30) + return (-1); + + /* + * We may have already processed this object file in an earlier linker + * invocation. Check to see if the present instruction sequence matches + * the one we would install below. + */ + if (isenabled) { + if (ip[0] == DT_OP_NOP) { + (*off) += sizeof (ip[0]); + return (0); + } + } else { + if (DT_IS_RESTORE(ip[1])) { + if (ip[0] == DT_OP_RET) { + (*off) += sizeof (ip[0]); + return (0); + } + } else if (DT_IS_MOV_O7(ip[1])) { + if (DT_IS_RETL(ip[0])) + return (0); + } else { + if (ip[0] == DT_OP_NOP) { + (*off) += sizeof (ip[0]); + return (0); + } + } + } + + /* + * We only expect call instructions with a displacement of 0. + */ + if (ip[0] != DT_OP_CALL) { + dt_dprintf("found %x instead of a call instruction at %llx\n", + ip[0], (u_longlong_t)rela->r_offset); + return (-1); + } + + if (isenabled) { + /* + * It would necessarily indicate incorrect usage if an is- + * enabled probe were tail-called so flag that as an error. + * It's also potentially (very) tricky to handle gracefully, + * but could be done if this were a desired use scenario. + */ + if (DT_IS_RESTORE(ip[1]) || DT_IS_MOV_O7(ip[1])) { + dt_dprintf("tail call to is-enabled probe at %llx\n", + (u_longlong_t)rela->r_offset); + return (-1); + } + + + /* + * On SPARC, we take advantage of the fact that the first + * argument shares the same register as for the return value. + * The macro handles the work of zeroing that register so we + * don't need to do anything special here. We instrument the + * instruction in the delay slot as we'll need to modify the + * return register after that instruction has been emulated. + */ + ip[0] = DT_OP_NOP; + (*off) += sizeof (ip[0]); + } else { + /* + * If the call is followed by a restore, it's a tail call so + * change the call to a ret. If the call if followed by a mov + * of a register into %o7, it's a tail call in leaf context + * so change the call to a retl-like instruction that returns + * to that register value + 8 (rather than the typical %o7 + + * 8); the delay slot instruction is left, but should have no + * effect. Otherwise we change the call to be a nop. We + * identify the subsequent instruction as the probe point in + * all but the leaf tail-call case to ensure that arguments to + * the probe are complete and consistent. An astute, though + * largely hypothetical, observer would note that there is the + * possibility of a false-positive probe firing if the function + * contained a branch to the instruction in the delay slot of + * the call. Fixing this would require significant in-kernel + * modifications, and isn't worth doing until we see it in the + * wild. + */ + if (DT_IS_RESTORE(ip[1])) { + ip[0] = DT_OP_RET; + (*off) += sizeof (ip[0]); + } else if (DT_IS_MOV_O7(ip[1])) { + ip[0] = DT_MAKE_RETL(DT_RS2(ip[1])); + } else { + ip[0] = DT_OP_NOP; + (*off) += sizeof (ip[0]); + } + } + + return (0); +} + +#elif defined(__i386) || defined(__amd64) + +#define DT_OP_NOP 0x90 +#define DT_OP_RET 0xc3 +#define DT_OP_CALL 0xe8 +#define DT_OP_JMP32 0xe9 +#define DT_OP_REX_RAX 0x48 +#define DT_OP_XOR_EAX_0 0x33 +#define DT_OP_XOR_EAX_1 0xc0 + +static int +dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela, + uint32_t *off) +{ + uint8_t *ip = (uint8_t *)(p + rela->r_offset - 1); + uint8_t ret; + + /* + * On x86, the first byte of the instruction is the call opcode and + * the next four bytes are the 32-bit address; the relocation is for + * the address operand. We back up the offset to the first byte of + * the instruction. For is-enabled probes, we later advance the offset + * so that it hits the first nop in the instruction sequence. + */ + (*off) -= 1; + + /* + * We only know about some specific relocation types. Luckily + * these types have the same values on both 32-bit and 64-bit + * x86 architectures. + */ + if (GELF_R_TYPE(rela->r_info) != R_386_PC32 && + GELF_R_TYPE(rela->r_info) != R_386_PLT32) + return (-1); + + /* + * We may have already processed this object file in an earlier linker + * invocation. Check to see if the present instruction sequence matches + * the one we would install. For is-enabled probes, we advance the + * offset to the first nop instruction in the sequence to match the + * text modification code below. + */ + if (!isenabled) { + if ((ip[0] == DT_OP_NOP || ip[0] == DT_OP_RET) && + ip[1] == DT_OP_NOP && ip[2] == DT_OP_NOP && + ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) + return (0); + } else if (dtp->dt_oflags & DTRACE_O_LP64) { + if (ip[0] == DT_OP_REX_RAX && + ip[1] == DT_OP_XOR_EAX_0 && ip[2] == DT_OP_XOR_EAX_1 && + (ip[3] == DT_OP_NOP || ip[3] == DT_OP_RET) && + ip[4] == DT_OP_NOP) { + (*off) += 3; + return (0); + } + } else { + if (ip[0] == DT_OP_XOR_EAX_0 && ip[1] == DT_OP_XOR_EAX_1 && + (ip[2] == DT_OP_NOP || ip[2] == DT_OP_RET) && + ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) { + (*off) += 2; + return (0); + } + } + + /* + * We expect either a call instrution with a 32-bit displacement or a + * jmp instruction with a 32-bit displacement acting as a tail-call. + */ + if (ip[0] != DT_OP_CALL && ip[0] != DT_OP_JMP32) { + dt_dprintf("found %x instead of a call or jmp instruction at " + "%llx\n", ip[0], (u_longlong_t)rela->r_offset); + return (-1); + } + + ret = (ip[0] == DT_OP_JMP32) ? DT_OP_RET : DT_OP_NOP; + + /* + * Establish the instruction sequence -- all nops for probes, and an + * instruction to clear the return value register (%eax/%rax) followed + * by nops for is-enabled probes. For is-enabled probes, we advance + * the offset to the first nop. This isn't stricly necessary but makes + * for more readable disassembly when the probe is enabled. + */ + if (!isenabled) { + ip[0] = ret; + ip[1] = DT_OP_NOP; + ip[2] = DT_OP_NOP; + ip[3] = DT_OP_NOP; + ip[4] = DT_OP_NOP; + } else if (dtp->dt_oflags & DTRACE_O_LP64) { + ip[0] = DT_OP_REX_RAX; + ip[1] = DT_OP_XOR_EAX_0; + ip[2] = DT_OP_XOR_EAX_1; + ip[3] = ret; + ip[4] = DT_OP_NOP; + (*off) += 3; + } else { + ip[0] = DT_OP_XOR_EAX_0; + ip[1] = DT_OP_XOR_EAX_1; + ip[2] = ret; + ip[3] = DT_OP_NOP; + ip[4] = DT_OP_NOP; + (*off) += 2; + } + + return (0); +} + +#else +#error unknown ISA +#endif + +/*PRINTFLIKE5*/ +static int +dt_link_error(dtrace_hdl_t *dtp, Elf *elf, int fd, dt_link_pair_t *bufs, + const char *format, ...) +{ + va_list ap; + dt_link_pair_t *pair; + + va_start(ap, format); + dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap); + va_end(ap); + + if (elf != NULL) + (void) elf_end(elf); + + if (fd >= 0) + (void) close(fd); + + while ((pair = bufs) != NULL) { + bufs = pair->dlp_next; + dt_free(dtp, pair->dlp_str); + dt_free(dtp, pair->dlp_sym); + dt_free(dtp, pair); + } + + return (dt_set_errno(dtp, EDT_COMPILER)); +} + +static int +process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp) +{ + static const char dt_prefix[] = "__dtrace"; + static const char dt_enabled[] = "enabled"; + static const char dt_symprefix[] = "$dtrace"; + static const char dt_symfmt[] = "%s%ld.%s"; + int fd, i, ndx, eprobe, mod = 0; + Elf *elf = NULL; + GElf_Ehdr ehdr; + Elf_Scn *scn_rel, *scn_sym, *scn_str, *scn_tgt; + Elf_Data *data_rel, *data_sym, *data_str, *data_tgt; + GElf_Shdr shdr_rel, shdr_sym, shdr_str, shdr_tgt; + GElf_Sym rsym, fsym, dsym; + GElf_Rela rela; + char *s, *p, *r; + char pname[DTRACE_PROVNAMELEN]; + dt_provider_t *pvp; + dt_probe_t *prp; + uint32_t off, eclass, emachine1, emachine2; + size_t symsize, nsym, isym, istr, len; + key_t objkey; + dt_link_pair_t *pair, *bufs = NULL; + dt_strtab_t *strtab; + + if ((fd = open64(obj, O_RDWR)) == -1) { + return (dt_link_error(dtp, elf, fd, bufs, + "failed to open %s: %s", obj, strerror(errno))); + } + + if ((elf = elf_begin(fd, ELF_C_RDWR, NULL)) == NULL) { + return (dt_link_error(dtp, elf, fd, bufs, + "failed to process %s: %s", obj, elf_errmsg(elf_errno()))); + } + + switch (elf_kind(elf)) { + case ELF_K_ELF: + break; + case ELF_K_AR: + return (dt_link_error(dtp, elf, fd, bufs, "archives are not " + "permitted; use the contents of the archive instead: %s", + obj)); + default: + return (dt_link_error(dtp, elf, fd, bufs, + "invalid file type: %s", obj)); + } + + if (gelf_getehdr(elf, &ehdr) == NULL) { + return (dt_link_error(dtp, elf, fd, bufs, "corrupt file: %s", + obj)); + } + + if (dtp->dt_oflags & DTRACE_O_LP64) { + eclass = ELFCLASS64; +#if defined(__mips__) + emachine1 = emachine2 = EM_MIPS; +#elif defined(__powerpc__) + emachine1 = emachine2 = EM_PPC64; +#elif defined(__sparc) + emachine1 = emachine2 = EM_SPARCV9; +#elif defined(__i386) || defined(__amd64) + emachine1 = emachine2 = EM_AMD64; +#endif + symsize = sizeof (Elf64_Sym); + } else { + eclass = ELFCLASS32; +#if defined(__arm__) + emachine1 = emachine2 = EM_ARM; +#elif defined(__mips__) + emachine1 = emachine2 = EM_MIPS; +#elif defined(__powerpc__) + emachine1 = emachine2 = EM_PPC; +#elif defined(__sparc) + emachine1 = EM_SPARC; + emachine2 = EM_SPARC32PLUS; +#elif defined(__i386) || defined(__amd64) + emachine1 = emachine2 = EM_386; +#endif + symsize = sizeof (Elf32_Sym); + } + + if (ehdr.e_ident[EI_CLASS] != eclass) { + return (dt_link_error(dtp, elf, fd, bufs, + "incorrect ELF class for object file: %s", obj)); + } + + if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) { + return (dt_link_error(dtp, elf, fd, bufs, + "incorrect ELF machine type for object file: %s", obj)); + } + + /* + * We use this token as a relatively unique handle for this file on the + * system in order to disambiguate potential conflicts between files of + * the same name which contain identially named local symbols. + */ + if ((objkey = ftok(obj, 0)) == (key_t)-1) { + return (dt_link_error(dtp, elf, fd, bufs, + "failed to generate unique key for object file: %s", obj)); + } + + scn_rel = NULL; + while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) { + if (gelf_getshdr(scn_rel, &shdr_rel) == NULL) + goto err; + + /* + * Skip any non-relocation sections. + */ + if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL) + continue; + + if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL) + goto err; + + /* + * Grab the section, section header and section data for the + * symbol table that this relocation section references. + */ + if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL || + gelf_getshdr(scn_sym, &shdr_sym) == NULL || + (data_sym = elf_getdata(scn_sym, NULL)) == NULL) + goto err; + + /* + * Ditto for that symbol table's string table. + */ + if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL || + gelf_getshdr(scn_str, &shdr_str) == NULL || + (data_str = elf_getdata(scn_str, NULL)) == NULL) + goto err; + + /* + * Grab the section, section header and section data for the + * target section for the relocations. For the relocations + * we're looking for -- this will typically be the text of the + * object file. + */ + if ((scn_tgt = elf_getscn(elf, shdr_rel.sh_info)) == NULL || + gelf_getshdr(scn_tgt, &shdr_tgt) == NULL || + (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL) + goto err; + + /* + * We're looking for relocations to symbols matching this form: + * + * __dtrace[enabled]_<prov>___<probe> + * + * For the generated object, we need to record the location + * identified by the relocation, and create a new relocation + * in the generated object that will be resolved at link time + * to the location of the function in which the probe is + * embedded. In the target object, we change the matched symbol + * so that it will be ignored at link time, and we modify the + * target (text) section to replace the call instruction with + * one or more nops. + * + * If the function containing the probe is locally scoped + * (static), we create an alias used by the relocation in the + * generated object. The alias, a new symbol, will be global + * (so that the relocation from the generated object can be + * resolved), and hidden (so that it is converted to a local + * symbol at link time). Such aliases have this form: + * + * $dtrace<key>.<function> + * + * We take a first pass through all the relocations to + * populate our string table and count the number of extra + * symbols we'll require. + */ + strtab = dt_strtab_create(1); + nsym = 0; + isym = data_sym->d_size / symsize; + istr = data_str->d_size; + + for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) { + + if (shdr_rel.sh_type == SHT_RELA) { + if (gelf_getrela(data_rel, i, &rela) == NULL) + continue; + } else { + GElf_Rel rel; + if (gelf_getrel(data_rel, i, &rel) == NULL) + continue; + rela.r_offset = rel.r_offset; + rela.r_info = rel.r_info; + rela.r_addend = 0; + } + + if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info), + &rsym) == NULL) { + dt_strtab_destroy(strtab); + goto err; + } + + s = (char *)data_str->d_buf + rsym.st_name; + + if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0) + continue; + + if (dt_symtab_lookup(data_sym, isym, rela.r_offset, + shdr_rel.sh_info, &fsym) != 0) { + dt_strtab_destroy(strtab); + goto err; + } + + if (GELF_ST_BIND(fsym.st_info) != STB_LOCAL) + continue; + + if (fsym.st_name > data_str->d_size) { + dt_strtab_destroy(strtab); + goto err; + } + + s = (char *)data_str->d_buf + fsym.st_name; + + /* + * If this symbol isn't of type function, we've really + * driven off the rails or the object file is corrupt. + */ + if (GELF_ST_TYPE(fsym.st_info) != STT_FUNC) { + dt_strtab_destroy(strtab); + return (dt_link_error(dtp, elf, fd, bufs, + "expected %s to be of type function", s)); + } + + len = snprintf(NULL, 0, dt_symfmt, dt_symprefix, + objkey, s) + 1; + if ((p = dt_alloc(dtp, len)) == NULL) { + dt_strtab_destroy(strtab); + goto err; + } + (void) snprintf(p, len, dt_symfmt, dt_symprefix, + objkey, s); + + if (dt_strtab_index(strtab, p) == -1) { + nsym++; + (void) dt_strtab_insert(strtab, p); + } + + dt_free(dtp, p); + } + + /* + * If needed, allocate the additional space for the symbol + * table and string table copying the old data into the new + * buffers, and marking the buffers as dirty. We inject those + * newly allocated buffers into the libelf data structures, but + * are still responsible for freeing them once we're done with + * the elf handle. + */ + if (nsym > 0) { + /* + * The first byte of the string table is reserved for + * the \0 entry. + */ + len = dt_strtab_size(strtab) - 1; + + assert(len > 0); + assert(dt_strtab_index(strtab, "") == 0); + + dt_strtab_destroy(strtab); + + if ((pair = dt_alloc(dtp, sizeof (*pair))) == NULL) + goto err; + + if ((pair->dlp_str = dt_alloc(dtp, data_str->d_size + + len)) == NULL) { + dt_free(dtp, pair); + goto err; + } + + if ((pair->dlp_sym = dt_alloc(dtp, data_sym->d_size + + nsym * symsize)) == NULL) { + dt_free(dtp, pair->dlp_str); + dt_free(dtp, pair); + goto err; + } + + pair->dlp_next = bufs; + bufs = pair; + + bcopy(data_str->d_buf, pair->dlp_str, data_str->d_size); + data_str->d_buf = pair->dlp_str; + data_str->d_size += len; + (void) elf_flagdata(data_str, ELF_C_SET, ELF_F_DIRTY); + + shdr_str.sh_size += len; + (void) gelf_update_shdr(scn_str, &shdr_str); + + bcopy(data_sym->d_buf, pair->dlp_sym, data_sym->d_size); + data_sym->d_buf = pair->dlp_sym; + data_sym->d_size += nsym * symsize; + (void) elf_flagdata(data_sym, ELF_C_SET, ELF_F_DIRTY); + + shdr_sym.sh_size += nsym * symsize; + (void) gelf_update_shdr(scn_sym, &shdr_sym); + + nsym += isym; + } else { + dt_strtab_destroy(strtab); + } + + /* + * Now that the tables have been allocated, perform the + * modifications described above. + */ + for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) { + + if (shdr_rel.sh_type == SHT_RELA) { + if (gelf_getrela(data_rel, i, &rela) == NULL) + continue; + } else { + GElf_Rel rel; + if (gelf_getrel(data_rel, i, &rel) == NULL) + continue; + rela.r_offset = rel.r_offset; + rela.r_info = rel.r_info; + rela.r_addend = 0; + } + + ndx = GELF_R_SYM(rela.r_info); + + if (gelf_getsym(data_sym, ndx, &rsym) == NULL || + rsym.st_name > data_str->d_size) + goto err; + + s = (char *)data_str->d_buf + rsym.st_name; + + if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0) + continue; + + s += sizeof (dt_prefix) - 1; + + /* + * Check to see if this is an 'is-enabled' check as + * opposed to a normal probe. + */ + if (strncmp(s, dt_enabled, + sizeof (dt_enabled) - 1) == 0) { + s += sizeof (dt_enabled) - 1; + eprobe = 1; + *eprobesp = 1; + dt_dprintf("is-enabled probe\n"); + } else { + eprobe = 0; + dt_dprintf("normal probe\n"); + } + + if (*s++ != '_') + goto err; + + if ((p = strstr(s, "___")) == NULL || + p - s >= sizeof (pname)) + goto err; + + bcopy(s, pname, p - s); + pname[p - s] = '\0'; + + p = strhyphenate(p + 3); /* strlen("___") */ + + if (dt_symtab_lookup(data_sym, isym, rela.r_offset, + shdr_rel.sh_info, &fsym) != 0) + goto err; + + if (fsym.st_name > data_str->d_size) + goto err; + + assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC); + + /* + * If a NULL relocation name is passed to + * dt_probe_define(), the function name is used for the + * relocation. The relocation needs to use a mangled + * name if the symbol is locally scoped; the function + * name may need to change if we've found the global + * alias for the locally scoped symbol (we prefer + * global symbols to locals in dt_symtab_lookup()). + */ + s = (char *)data_str->d_buf + fsym.st_name; + r = NULL; + + if (GELF_ST_BIND(fsym.st_info) == STB_LOCAL) { + dsym = fsym; + dsym.st_name = istr; + dsym.st_info = GELF_ST_INFO(STB_GLOBAL, + STT_FUNC); + dsym.st_other = + ELF64_ST_VISIBILITY(STV_ELIMINATE); + (void) gelf_update_sym(data_sym, isym, &dsym); + + r = (char *)data_str->d_buf + istr; + istr += 1 + sprintf(r, dt_symfmt, + dt_symprefix, objkey, s); + isym++; + assert(isym <= nsym); + + } else if (strncmp(s, dt_symprefix, + strlen(dt_symprefix)) == 0) { + r = s; + if ((s = strchr(s, '.')) == NULL) + goto err; + s++; + } + + if ((pvp = dt_provider_lookup(dtp, pname)) == NULL) { + return (dt_link_error(dtp, elf, fd, bufs, + "no such provider %s", pname)); + } + + if ((prp = dt_probe_lookup(pvp, p)) == NULL) { + return (dt_link_error(dtp, elf, fd, bufs, + "no such probe %s", p)); + } + + assert(fsym.st_value <= rela.r_offset); + + off = rela.r_offset - fsym.st_value; + if (dt_modtext(dtp, data_tgt->d_buf, eprobe, + &rela, &off) != 0) + goto err; + + if (dt_probe_define(pvp, prp, s, r, off, eprobe) != 0) { + return (dt_link_error(dtp, elf, fd, bufs, + "failed to allocate space for probe")); + } +#if !defined(sun) + /* + * Our linker doesn't understand the SUNW_IGNORE ndx and + * will try to use this relocation when we build the + * final executable. Since we are done processing this + * relocation, mark it as inexistant and let libelf + * remove it from the file. + * If this wasn't done, we would have garbage added to + * the executable file as the symbol is going to be + * change from UND to ABS. + */ + if (shdr_rel.sh_type == SHT_RELA) { + rela.r_offset = 0; + rela.r_info = 0; + rela.r_addend = 0; + (void) gelf_update_rela(data_rel, i, &rela); + } else { + GElf_Rel rel; + rel.r_offset = 0; + rel.r_info = 0; + (void) gelf_update_rel(data_rel, i, &rel); + } +#endif + + mod = 1; + (void) elf_flagdata(data_tgt, ELF_C_SET, ELF_F_DIRTY); + + /* + * This symbol may already have been marked to + * be ignored by another relocation referencing + * the same symbol or if this object file has + * already been processed by an earlier link + * invocation. + */ +#if !defined(sun) +#define SHN_SUNW_IGNORE SHN_ABS +#endif + if (rsym.st_shndx != SHN_SUNW_IGNORE) { + rsym.st_shndx = SHN_SUNW_IGNORE; + (void) gelf_update_sym(data_sym, ndx, &rsym); + } + } + } + + if (mod && elf_update(elf, ELF_C_WRITE) == -1) + goto err; + + (void) elf_end(elf); + (void) close(fd); + +#if !defined(sun) + if (nsym > 0) +#endif + while ((pair = bufs) != NULL) { + bufs = pair->dlp_next; + dt_free(dtp, pair->dlp_str); + dt_free(dtp, pair->dlp_sym); + dt_free(dtp, pair); + } + + return (0); + +err: + return (dt_link_error(dtp, elf, fd, bufs, + "an error was encountered while processing %s", obj)); +} + +int +dtrace_program_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t dflags, + const char *file, int objc, char *const objv[]) +{ +#if !defined(sun) + char tfile[PATH_MAX]; +#endif + char drti[PATH_MAX]; + dof_hdr_t *dof; + int fd, status, i, cur; + char *cmd, tmp; + size_t len; + int eprobes = 0, ret = 0; + +#if !defined(sun) + if (access(file, R_OK) == 0) { + fprintf(stderr, "dtrace: target object (%s) already exists. " + "Please remove the target\ndtrace: object and rebuild all " + "the source objects if you wish to run the DTrace\n" + "dtrace: linking process again\n", file); + /* + * Several build infrastructures run DTrace twice (e.g. + * postgres) and we don't want the build to fail. Return + * 0 here since this isn't really a fatal error. + */ + return (0); + } +#endif + + /* + * A NULL program indicates a special use in which we just link + * together a bunch of object files specified in objv and then + * unlink(2) those object files. + */ + if (pgp == NULL) { + const char *fmt = "%s -o %s -r"; + + len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file) + 1; + + for (i = 0; i < objc; i++) + len += strlen(objv[i]) + 1; + + cmd = alloca(len); + + cur = snprintf(cmd, len, fmt, dtp->dt_ld_path, file); + + for (i = 0; i < objc; i++) + cur += snprintf(cmd + cur, len - cur, " %s", objv[i]); + + if ((status = system(cmd)) == -1) { + return (dt_link_error(dtp, NULL, -1, NULL, + "failed to run %s: %s", dtp->dt_ld_path, + strerror(errno))); + } + + if (WIFSIGNALED(status)) { + return (dt_link_error(dtp, NULL, -1, NULL, + "failed to link %s: %s failed due to signal %d", + file, dtp->dt_ld_path, WTERMSIG(status))); + } + + if (WEXITSTATUS(status) != 0) { + return (dt_link_error(dtp, NULL, -1, NULL, + "failed to link %s: %s exited with status %d\n", + file, dtp->dt_ld_path, WEXITSTATUS(status))); + } + + for (i = 0; i < objc; i++) { + if (strcmp(objv[i], file) != 0) + (void) unlink(objv[i]); + } + + return (0); + } + + for (i = 0; i < objc; i++) { + if (process_obj(dtp, objv[i], &eprobes) != 0) + return (-1); /* errno is set for us */ + } + + /* + * If there are is-enabled probes then we need to force use of DOF + * version 2. + */ + if (eprobes && pgp->dp_dofversion < DOF_VERSION_2) + pgp->dp_dofversion = DOF_VERSION_2; + + if ((dof = dtrace_dof_create(dtp, pgp, dflags)) == NULL) + return (-1); /* errno is set for us */ + +#if defined(sun) + /* + * Create a temporary file and then unlink it if we're going to + * combine it with drti.o later. We can still refer to it in child + * processes as /dev/fd/<fd>. + */ + if ((fd = open64(file, O_RDWR | O_CREAT | O_TRUNC, 0666)) == -1) { + return (dt_link_error(dtp, NULL, -1, NULL, + "failed to open %s: %s", file, strerror(errno))); + } +#else + snprintf(tfile, sizeof(tfile), "%s.XXXXXX", file); + if ((fd = mkstemp(tfile)) == -1) + return (dt_link_error(dtp, NULL, -1, NULL, + "failed to create temporary file %s: %s", + tfile, strerror(errno))); +#endif + + /* + * If -xlinktype=DOF has been selected, just write out the DOF. + * Otherwise proceed to the default of generating and linking ELF. + */ + switch (dtp->dt_linktype) { + case DT_LTYP_DOF: + if (dt_write(dtp, fd, dof, dof->dofh_filesz) < dof->dofh_filesz) + ret = errno; + + if (close(fd) != 0 && ret == 0) + ret = errno; + + if (ret != 0) { + return (dt_link_error(dtp, NULL, -1, NULL, + "failed to write %s: %s", file, strerror(ret))); + } + + return (0); + + case DT_LTYP_ELF: + break; /* fall through to the rest of dtrace_program_link() */ + + default: + return (dt_link_error(dtp, NULL, -1, NULL, + "invalid link type %u\n", dtp->dt_linktype)); + } + + +#if defined(sun) + if (!dtp->dt_lazyload) + (void) unlink(file); +#endif + + if (dtp->dt_oflags & DTRACE_O_LP64) + status = dump_elf64(dtp, dof, fd); + else + status = dump_elf32(dtp, dof, fd); + +#if defined(sun) + if (status != 0 || lseek(fd, 0, SEEK_SET) != 0) { + return (dt_link_error(dtp, NULL, -1, NULL, + "failed to write %s: %s", file, strerror(errno))); + } +#else + if (status != 0) + return (dt_link_error(dtp, NULL, -1, NULL, + "failed to write %s: %s", tfile, + strerror(dtrace_errno(dtp)))); +#endif + + if (!dtp->dt_lazyload) { +#if defined(sun) + const char *fmt = "%s -o %s -r -Blocal -Breduce /dev/fd/%d %s"; + + if (dtp->dt_oflags & DTRACE_O_LP64) { + (void) snprintf(drti, sizeof (drti), + "%s/64/drti.o", _dtrace_libdir); + } else { + (void) snprintf(drti, sizeof (drti), + "%s/drti.o", _dtrace_libdir); + } + + len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, fd, + drti) + 1; + + cmd = alloca(len); + + (void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, fd, drti); +#else + const char *fmt = "%s -o %s -r %s %s"; + +#if defined(__amd64__) + /* + * Arches which default to 64-bit need to explicitly use + * the 32-bit library path. + */ + int use_32 = (dtp->dt_oflags & DTRACE_O_ILP32); +#else + /* + * Arches which are 32-bit only just use the normal + * library path. + */ + int use_32 = 0; +#endif + + (void) snprintf(drti, sizeof (drti), "/usr/lib%s/dtrace/drti.o", + use_32 ? "32":""); + + len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, tfile, + drti) + 1; + + cmd = alloca(len); + + (void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, tfile, + drti); +#endif + if ((status = system(cmd)) == -1) { + ret = dt_link_error(dtp, NULL, -1, NULL, + "failed to run %s: %s", dtp->dt_ld_path, + strerror(errno)); + goto done; + } + + if (WIFSIGNALED(status)) { + ret = dt_link_error(dtp, NULL, -1, NULL, + "failed to link %s: %s failed due to signal %d", + file, dtp->dt_ld_path, WTERMSIG(status)); + goto done; + } + + if (WEXITSTATUS(status) != 0) { + ret = dt_link_error(dtp, NULL, -1, NULL, + "failed to link %s: %s exited with status %d\n", + file, dtp->dt_ld_path, WEXITSTATUS(status)); + goto done; + } + (void) close(fd); /* release temporary file */ + } else { + (void) close(fd); + } + +done: + dtrace_dof_destroy(dtp, dof); + +#if !defined(sun) + unlink(tfile); +#endif + return (ret); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_list.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_list.c new file mode 100644 index 0000000..32279e9 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_list.c @@ -0,0 +1,111 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Simple doubly-linked list implementation. This implementation assumes that + * each list element contains an embedded dt_list_t (previous and next + * pointers), which is typically the first member of the element struct. + * An additional dt_list_t is used to store the head (dl_next) and tail + * (dl_prev) pointers. The current head and tail list elements have their + * previous and next pointers set to NULL, respectively. + */ + +#include <unistd.h> +#include <assert.h> +#include <dt_list.h> + +void +dt_list_append(dt_list_t *dlp, void *new) +{ + dt_list_t *p = dlp->dl_prev; /* p = tail list element */ + dt_list_t *q = new; /* q = new list element */ + + dlp->dl_prev = q; + q->dl_prev = p; + q->dl_next = NULL; + + if (p != NULL) { + assert(p->dl_next == NULL); + p->dl_next = q; + } else { + assert(dlp->dl_next == NULL); + dlp->dl_next = q; + } +} + +void +dt_list_prepend(dt_list_t *dlp, void *new) +{ + dt_list_t *p = new; /* p = new list element */ + dt_list_t *q = dlp->dl_next; /* q = head list element */ + + dlp->dl_next = p; + p->dl_prev = NULL; + p->dl_next = q; + + if (q != NULL) { + assert(q->dl_prev == NULL); + q->dl_prev = p; + } else { + assert(dlp->dl_prev == NULL); + dlp->dl_prev = p; + } +} + +void +dt_list_insert(dt_list_t *dlp, void *after_me, void *new) +{ + dt_list_t *p = after_me; + dt_list_t *q = new; + + if (p == NULL || p->dl_next == NULL) { + dt_list_append(dlp, new); + return; + } + + q->dl_next = p->dl_next; + q->dl_prev = p; + p->dl_next = q; + q->dl_next->dl_prev = q; +} + +void +dt_list_delete(dt_list_t *dlp, void *existing) +{ + dt_list_t *p = existing; + + if (p->dl_prev != NULL) + p->dl_prev->dl_next = p->dl_next; + else + dlp->dl_next = p->dl_next; + + if (p->dl_next != NULL) + p->dl_next->dl_prev = p->dl_prev; + else + dlp->dl_prev = p->dl_prev; +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_list.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_list.h new file mode 100644 index 0000000..348d18a --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_list.h @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_LIST_H +#define _DT_LIST_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dt_list { + struct dt_list *dl_prev; + struct dt_list *dl_next; +} dt_list_t; + +#define dt_list_prev(elem) ((void *)(((dt_list_t *)(elem))->dl_prev)) +#define dt_list_next(elem) ((void *)(((dt_list_t *)(elem))->dl_next)) + +extern void dt_list_append(dt_list_t *, void *); +extern void dt_list_prepend(dt_list_t *, void *); +extern void dt_list_insert(dt_list_t *, void *, void *); +extern void dt_list_delete(dt_list_t *, void *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_LIST_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_map.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_map.c new file mode 100644 index 0000000..6ab7cd9 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_map.c @@ -0,0 +1,493 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2011 by Delphix. All rights reserved. + */ + +#include <stdlib.h> +#include <strings.h> +#include <errno.h> +#include <unistd.h> +#include <assert.h> + +#include <dt_impl.h> +#include <dt_printf.h> + +static int +dt_strdata_add(dtrace_hdl_t *dtp, dtrace_recdesc_t *rec, void ***data, int *max) +{ + int maxformat, rval; + dtrace_fmtdesc_t fmt; + void *result; + + if (rec->dtrd_format == 0) + return (0); + + if (rec->dtrd_format <= *max && + (*data)[rec->dtrd_format - 1] != NULL) { + return (0); + } + + bzero(&fmt, sizeof (fmt)); + fmt.dtfd_format = rec->dtrd_format; + fmt.dtfd_string = NULL; + fmt.dtfd_length = 0; + + if (dt_ioctl(dtp, DTRACEIOC_FORMAT, &fmt) == -1) + return (dt_set_errno(dtp, errno)); + + if ((fmt.dtfd_string = dt_alloc(dtp, fmt.dtfd_length)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + if (dt_ioctl(dtp, DTRACEIOC_FORMAT, &fmt) == -1) { + rval = dt_set_errno(dtp, errno); + free(fmt.dtfd_string); + return (rval); + } + + while (rec->dtrd_format > (maxformat = *max)) { + int new_max = maxformat ? (maxformat << 1) : 1; + size_t nsize = new_max * sizeof (void *); + size_t osize = maxformat * sizeof (void *); + void **new_data = dt_zalloc(dtp, nsize); + + if (new_data == NULL) { + dt_free(dtp, fmt.dtfd_string); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + bcopy(*data, new_data, osize); + free(*data); + + *data = new_data; + *max = new_max; + } + + switch (rec->dtrd_action) { + case DTRACEACT_DIFEXPR: + result = fmt.dtfd_string; + break; + case DTRACEACT_PRINTA: + result = dtrace_printa_create(dtp, fmt.dtfd_string); + dt_free(dtp, fmt.dtfd_string); + break; + default: + result = dtrace_printf_create(dtp, fmt.dtfd_string); + dt_free(dtp, fmt.dtfd_string); + break; + } + + if (result == NULL) + return (-1); + + (*data)[rec->dtrd_format - 1] = result; + + return (0); +} + +static int +dt_epid_add(dtrace_hdl_t *dtp, dtrace_epid_t id) +{ + dtrace_id_t max; + int rval, i; + dtrace_eprobedesc_t *enabled, *nenabled; + dtrace_probedesc_t *probe; + + while (id >= (max = dtp->dt_maxprobe) || dtp->dt_pdesc == NULL) { + dtrace_id_t new_max = max ? (max << 1) : 1; + size_t nsize = new_max * sizeof (void *); + dtrace_probedesc_t **new_pdesc; + dtrace_eprobedesc_t **new_edesc; + + if ((new_pdesc = malloc(nsize)) == NULL || + (new_edesc = malloc(nsize)) == NULL) { + free(new_pdesc); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + bzero(new_pdesc, nsize); + bzero(new_edesc, nsize); + + if (dtp->dt_pdesc != NULL) { + size_t osize = max * sizeof (void *); + + bcopy(dtp->dt_pdesc, new_pdesc, osize); + free(dtp->dt_pdesc); + + bcopy(dtp->dt_edesc, new_edesc, osize); + free(dtp->dt_edesc); + } + + dtp->dt_pdesc = new_pdesc; + dtp->dt_edesc = new_edesc; + dtp->dt_maxprobe = new_max; + } + + if (dtp->dt_pdesc[id] != NULL) + return (0); + + if ((enabled = malloc(sizeof (dtrace_eprobedesc_t))) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + bzero(enabled, sizeof (dtrace_eprobedesc_t)); + enabled->dtepd_epid = id; + enabled->dtepd_nrecs = 1; + +#if defined(sun) + if (dt_ioctl(dtp, DTRACEIOC_EPROBE, enabled) == -1) { +#else + if (dt_ioctl(dtp, DTRACEIOC_EPROBE, &enabled) == -1) { +#endif + rval = dt_set_errno(dtp, errno); + free(enabled); + return (rval); + } + + if (DTRACE_SIZEOF_EPROBEDESC(enabled) != sizeof (*enabled)) { + /* + * There must be more than one action. Allocate the + * appropriate amount of space and try again. + */ + if ((nenabled = + malloc(DTRACE_SIZEOF_EPROBEDESC(enabled))) != NULL) + bcopy(enabled, nenabled, sizeof (*enabled)); + + free(enabled); + + if ((enabled = nenabled) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + +#if defined(sun) + rval = dt_ioctl(dtp, DTRACEIOC_EPROBE, enabled); +#else + rval = dt_ioctl(dtp, DTRACEIOC_EPROBE, &enabled); +#endif + + if (rval == -1) { + rval = dt_set_errno(dtp, errno); + free(enabled); + return (rval); + } + } + + if ((probe = malloc(sizeof (dtrace_probedesc_t))) == NULL) { + free(enabled); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + probe->dtpd_id = enabled->dtepd_probeid; + + if (dt_ioctl(dtp, DTRACEIOC_PROBES, probe) == -1) { + rval = dt_set_errno(dtp, errno); + goto err; + } + + for (i = 0; i < enabled->dtepd_nrecs; i++) { + dtrace_recdesc_t *rec = &enabled->dtepd_rec[i]; + + if (DTRACEACT_ISPRINTFLIKE(rec->dtrd_action)) { + if (dt_strdata_add(dtp, rec, &dtp->dt_formats, + &dtp->dt_maxformat) != 0) { + rval = -1; + goto err; + } + } else if (rec->dtrd_action == DTRACEACT_DIFEXPR) { + if (dt_strdata_add(dtp, rec, + (void ***)&dtp->dt_strdata, + &dtp->dt_maxstrdata) != 0) { + rval = -1; + goto err; + } + } + + } + + dtp->dt_pdesc[id] = probe; + dtp->dt_edesc[id] = enabled; + + return (0); + +err: + /* + * If we failed, free our allocated probes. Note that if we failed + * while allocating formats, we aren't going to free formats that + * we have already allocated. This is okay; these formats are + * hanging off of dt_formats and will therefore not be leaked. + */ + free(enabled); + free(probe); + return (rval); +} + +int +dt_epid_lookup(dtrace_hdl_t *dtp, dtrace_epid_t epid, + dtrace_eprobedesc_t **epdp, dtrace_probedesc_t **pdp) +{ + int rval; + + if (epid >= dtp->dt_maxprobe || dtp->dt_pdesc[epid] == NULL) { + if ((rval = dt_epid_add(dtp, epid)) != 0) + return (rval); + } + + assert(epid < dtp->dt_maxprobe); + assert(dtp->dt_edesc[epid] != NULL); + assert(dtp->dt_pdesc[epid] != NULL); + *epdp = dtp->dt_edesc[epid]; + *pdp = dtp->dt_pdesc[epid]; + + return (0); +} + +void +dt_epid_destroy(dtrace_hdl_t *dtp) +{ + size_t i; + + assert((dtp->dt_pdesc != NULL && dtp->dt_edesc != NULL && + dtp->dt_maxprobe > 0) || (dtp->dt_pdesc == NULL && + dtp->dt_edesc == NULL && dtp->dt_maxprobe == 0)); + + if (dtp->dt_pdesc == NULL) + return; + + for (i = 0; i < dtp->dt_maxprobe; i++) { + if (dtp->dt_edesc[i] == NULL) { + assert(dtp->dt_pdesc[i] == NULL); + continue; + } + + assert(dtp->dt_pdesc[i] != NULL); + free(dtp->dt_edesc[i]); + free(dtp->dt_pdesc[i]); + } + + free(dtp->dt_pdesc); + dtp->dt_pdesc = NULL; + + free(dtp->dt_edesc); + dtp->dt_edesc = NULL; + dtp->dt_maxprobe = 0; +} + +void * +dt_format_lookup(dtrace_hdl_t *dtp, int format) +{ + if (format == 0 || format > dtp->dt_maxformat) + return (NULL); + + if (dtp->dt_formats == NULL) + return (NULL); + + return (dtp->dt_formats[format - 1]); +} + +void +dt_format_destroy(dtrace_hdl_t *dtp) +{ + int i; + + for (i = 0; i < dtp->dt_maxformat; i++) { + if (dtp->dt_formats[i] != NULL) + dt_printf_destroy(dtp->dt_formats[i]); + } + + free(dtp->dt_formats); + dtp->dt_formats = NULL; +} + +static int +dt_aggid_add(dtrace_hdl_t *dtp, dtrace_aggid_t id) +{ + dtrace_id_t max; + dtrace_epid_t epid; + int rval; + + while (id >= (max = dtp->dt_maxagg) || dtp->dt_aggdesc == NULL) { + dtrace_id_t new_max = max ? (max << 1) : 1; + size_t nsize = new_max * sizeof (void *); + dtrace_aggdesc_t **new_aggdesc; + + if ((new_aggdesc = malloc(nsize)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + bzero(new_aggdesc, nsize); + + if (dtp->dt_aggdesc != NULL) { + bcopy(dtp->dt_aggdesc, new_aggdesc, + max * sizeof (void *)); + free(dtp->dt_aggdesc); + } + + dtp->dt_aggdesc = new_aggdesc; + dtp->dt_maxagg = new_max; + } + + if (dtp->dt_aggdesc[id] == NULL) { + dtrace_aggdesc_t *agg, *nagg; + + if ((agg = malloc(sizeof (dtrace_aggdesc_t))) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + bzero(agg, sizeof (dtrace_aggdesc_t)); + agg->dtagd_id = id; + agg->dtagd_nrecs = 1; + +#if defined(sun) + if (dt_ioctl(dtp, DTRACEIOC_AGGDESC, agg) == -1) { +#else + if (dt_ioctl(dtp, DTRACEIOC_AGGDESC, &agg) == -1) { +#endif + rval = dt_set_errno(dtp, errno); + free(agg); + return (rval); + } + + if (DTRACE_SIZEOF_AGGDESC(agg) != sizeof (*agg)) { + /* + * There must be more than one action. Allocate the + * appropriate amount of space and try again. + */ + if ((nagg = malloc(DTRACE_SIZEOF_AGGDESC(agg))) != NULL) + bcopy(agg, nagg, sizeof (*agg)); + + free(agg); + + if ((agg = nagg) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + +#if defined(sun) + rval = dt_ioctl(dtp, DTRACEIOC_AGGDESC, agg); +#else + rval = dt_ioctl(dtp, DTRACEIOC_AGGDESC, &agg); +#endif + + if (rval == -1) { + rval = dt_set_errno(dtp, errno); + free(agg); + return (rval); + } + } + + /* + * If we have a uarg, it's a pointer to the compiler-generated + * statement; we'll use this value to get the name and + * compiler-generated variable ID for the aggregation. If + * we're grabbing an anonymous enabling, this pointer value + * is obviously meaningless -- and in this case, we can't + * provide the compiler-generated aggregation information. + */ + if (dtp->dt_options[DTRACEOPT_GRABANON] == DTRACEOPT_UNSET && + agg->dtagd_rec[0].dtrd_uarg != 0) { + dtrace_stmtdesc_t *sdp; + dt_ident_t *aid; + + sdp = (dtrace_stmtdesc_t *)(uintptr_t) + agg->dtagd_rec[0].dtrd_uarg; + aid = sdp->dtsd_aggdata; + agg->dtagd_name = aid->di_name; + agg->dtagd_varid = aid->di_id; + } else { + agg->dtagd_varid = DTRACE_AGGVARIDNONE; + } + + if ((epid = agg->dtagd_epid) >= dtp->dt_maxprobe || + dtp->dt_pdesc[epid] == NULL) { + if ((rval = dt_epid_add(dtp, epid)) != 0) { + free(agg); + return (rval); + } + } + + dtp->dt_aggdesc[id] = agg; + } + + return (0); +} + +int +dt_aggid_lookup(dtrace_hdl_t *dtp, dtrace_aggid_t aggid, + dtrace_aggdesc_t **adp) +{ + int rval; + + if (aggid >= dtp->dt_maxagg || dtp->dt_aggdesc[aggid] == NULL) { + if ((rval = dt_aggid_add(dtp, aggid)) != 0) + return (rval); + } + + assert(aggid < dtp->dt_maxagg); + assert(dtp->dt_aggdesc[aggid] != NULL); + *adp = dtp->dt_aggdesc[aggid]; + + return (0); +} + +void +dt_aggid_destroy(dtrace_hdl_t *dtp) +{ + size_t i; + + assert((dtp->dt_aggdesc != NULL && dtp->dt_maxagg != 0) || + (dtp->dt_aggdesc == NULL && dtp->dt_maxagg == 0)); + + if (dtp->dt_aggdesc == NULL) + return; + + for (i = 0; i < dtp->dt_maxagg; i++) { + if (dtp->dt_aggdesc[i] != NULL) + free(dtp->dt_aggdesc[i]); + } + + free(dtp->dt_aggdesc); + dtp->dt_aggdesc = NULL; + dtp->dt_maxagg = 0; +} + +const char * +dt_strdata_lookup(dtrace_hdl_t *dtp, int idx) +{ + if (idx == 0 || idx > dtp->dt_maxstrdata) + return (NULL); + + if (dtp->dt_strdata == NULL) + return (NULL); + + return (dtp->dt_strdata[idx - 1]); +} + +void +dt_strdata_destroy(dtrace_hdl_t *dtp) +{ + int i; + + for (i = 0; i < dtp->dt_maxstrdata; i++) { + free(dtp->dt_strdata[i]); + } + + free(dtp->dt_strdata); + dtp->dt_strdata = NULL; +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.c new file mode 100644 index 0000000..9dd52b5 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.c @@ -0,0 +1,1735 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <sys/types.h> +#if defined(sun) +#include <sys/modctl.h> +#include <sys/kobj.h> +#include <sys/kobj_impl.h> +#include <sys/sysmacros.h> +#include <sys/elf.h> +#include <sys/task.h> +#else +#include <sys/param.h> +#include <sys/linker.h> +#include <sys/stat.h> +#endif + +#include <unistd.h> +#if defined(sun) +#include <project.h> +#endif +#include <strings.h> +#include <stdlib.h> +#include <libelf.h> +#include <limits.h> +#include <assert.h> +#include <errno.h> +#include <dirent.h> +#if !defined(sun) +#include <fcntl.h> +#include <libproc_compat.h> +#endif + +#include <dt_strtab.h> +#include <dt_module.h> +#include <dt_impl.h> + +static const char *dt_module_strtab; /* active strtab for qsort callbacks */ + +static void +dt_module_symhash_insert(dt_module_t *dmp, const char *name, uint_t id) +{ + dt_sym_t *dsp = &dmp->dm_symchains[dmp->dm_symfree]; + uint_t h; + + assert(dmp->dm_symfree < dmp->dm_nsymelems + 1); + + dsp->ds_symid = id; + h = dt_strtab_hash(name, NULL) % dmp->dm_nsymbuckets; + dsp->ds_next = dmp->dm_symbuckets[h]; + dmp->dm_symbuckets[h] = dmp->dm_symfree++; +} + +static uint_t +dt_module_syminit32(dt_module_t *dmp) +{ +#if STT_NUM != (STT_TLS + 1) +#error "STT_NUM has grown. update dt_module_syminit32()" +#endif + + Elf32_Sym *sym = dmp->dm_symtab.cts_data; + const char *base = dmp->dm_strtab.cts_data; + size_t ss_size = dmp->dm_strtab.cts_size; + uint_t i, n = dmp->dm_nsymelems; + uint_t asrsv = 0; + +#if defined(__FreeBSD__) + GElf_Ehdr ehdr; + int is_elf_obj; + + gelf_getehdr(dmp->dm_elf, &ehdr); + is_elf_obj = (ehdr.e_type == ET_REL); +#endif + + for (i = 0; i < n; i++, sym++) { + const char *name = base + sym->st_name; + uchar_t type = ELF32_ST_TYPE(sym->st_info); + + if (type >= STT_NUM || type == STT_SECTION) + continue; /* skip sections and unknown types */ + + if (sym->st_name == 0 || sym->st_name >= ss_size) + continue; /* skip null or invalid names */ + + if (sym->st_value != 0 && + (ELF32_ST_BIND(sym->st_info) != STB_LOCAL || sym->st_size)) { + asrsv++; /* reserve space in the address map */ + +#if defined(__FreeBSD__) + sym->st_value += (Elf_Addr) dmp->dm_reloc_offset; + if (is_elf_obj && sym->st_shndx != SHN_UNDEF && + sym->st_shndx < ehdr.e_shnum) + sym->st_value += + dmp->dm_sec_offsets[sym->st_shndx]; +#endif + } + + dt_module_symhash_insert(dmp, name, i); + } + + return (asrsv); +} + +static uint_t +dt_module_syminit64(dt_module_t *dmp) +{ +#if STT_NUM != (STT_TLS + 1) +#error "STT_NUM has grown. update dt_module_syminit64()" +#endif + + Elf64_Sym *sym = dmp->dm_symtab.cts_data; + const char *base = dmp->dm_strtab.cts_data; + size_t ss_size = dmp->dm_strtab.cts_size; + uint_t i, n = dmp->dm_nsymelems; + uint_t asrsv = 0; + +#if defined(__FreeBSD__) + GElf_Ehdr ehdr; + int is_elf_obj; + + gelf_getehdr(dmp->dm_elf, &ehdr); + is_elf_obj = (ehdr.e_type == ET_REL); +#endif + + for (i = 0; i < n; i++, sym++) { + const char *name = base + sym->st_name; + uchar_t type = ELF64_ST_TYPE(sym->st_info); + + if (type >= STT_NUM || type == STT_SECTION) + continue; /* skip sections and unknown types */ + + if (sym->st_name == 0 || sym->st_name >= ss_size) + continue; /* skip null or invalid names */ + + if (sym->st_value != 0 && + (ELF64_ST_BIND(sym->st_info) != STB_LOCAL || sym->st_size)) { + asrsv++; /* reserve space in the address map */ +#if defined(__FreeBSD__) + sym->st_value += (Elf_Addr) dmp->dm_reloc_offset; + if (is_elf_obj && sym->st_shndx != SHN_UNDEF && + sym->st_shndx < ehdr.e_shnum) + sym->st_value += + dmp->dm_sec_offsets[sym->st_shndx]; +#endif + } + + dt_module_symhash_insert(dmp, name, i); + } + + return (asrsv); +} + +/* + * Sort comparison function for 32-bit symbol address-to-name lookups. We sort + * symbols by value. If values are equal, we prefer the symbol that is + * non-zero sized, typed, not weak, or lexically first, in that order. + */ +static int +dt_module_symcomp32(const void *lp, const void *rp) +{ + Elf32_Sym *lhs = *((Elf32_Sym **)lp); + Elf32_Sym *rhs = *((Elf32_Sym **)rp); + + if (lhs->st_value != rhs->st_value) + return (lhs->st_value > rhs->st_value ? 1 : -1); + + if ((lhs->st_size == 0) != (rhs->st_size == 0)) + return (lhs->st_size == 0 ? 1 : -1); + + if ((ELF32_ST_TYPE(lhs->st_info) == STT_NOTYPE) != + (ELF32_ST_TYPE(rhs->st_info) == STT_NOTYPE)) + return (ELF32_ST_TYPE(lhs->st_info) == STT_NOTYPE ? 1 : -1); + + if ((ELF32_ST_BIND(lhs->st_info) == STB_WEAK) != + (ELF32_ST_BIND(rhs->st_info) == STB_WEAK)) + return (ELF32_ST_BIND(lhs->st_info) == STB_WEAK ? 1 : -1); + + return (strcmp(dt_module_strtab + lhs->st_name, + dt_module_strtab + rhs->st_name)); +} + +/* + * Sort comparison function for 64-bit symbol address-to-name lookups. We sort + * symbols by value. If values are equal, we prefer the symbol that is + * non-zero sized, typed, not weak, or lexically first, in that order. + */ +static int +dt_module_symcomp64(const void *lp, const void *rp) +{ + Elf64_Sym *lhs = *((Elf64_Sym **)lp); + Elf64_Sym *rhs = *((Elf64_Sym **)rp); + + if (lhs->st_value != rhs->st_value) + return (lhs->st_value > rhs->st_value ? 1 : -1); + + if ((lhs->st_size == 0) != (rhs->st_size == 0)) + return (lhs->st_size == 0 ? 1 : -1); + + if ((ELF64_ST_TYPE(lhs->st_info) == STT_NOTYPE) != + (ELF64_ST_TYPE(rhs->st_info) == STT_NOTYPE)) + return (ELF64_ST_TYPE(lhs->st_info) == STT_NOTYPE ? 1 : -1); + + if ((ELF64_ST_BIND(lhs->st_info) == STB_WEAK) != + (ELF64_ST_BIND(rhs->st_info) == STB_WEAK)) + return (ELF64_ST_BIND(lhs->st_info) == STB_WEAK ? 1 : -1); + + return (strcmp(dt_module_strtab + lhs->st_name, + dt_module_strtab + rhs->st_name)); +} + +static void +dt_module_symsort32(dt_module_t *dmp) +{ + Elf32_Sym *symtab = (Elf32_Sym *)dmp->dm_symtab.cts_data; + Elf32_Sym **sympp = (Elf32_Sym **)dmp->dm_asmap; + const dt_sym_t *dsp = dmp->dm_symchains + 1; + uint_t i, n = dmp->dm_symfree; + + for (i = 1; i < n; i++, dsp++) { + Elf32_Sym *sym = symtab + dsp->ds_symid; + if (sym->st_value != 0 && + (ELF32_ST_BIND(sym->st_info) != STB_LOCAL || sym->st_size)) + *sympp++ = sym; + } + + dmp->dm_aslen = (uint_t)(sympp - (Elf32_Sym **)dmp->dm_asmap); + assert(dmp->dm_aslen <= dmp->dm_asrsv); + + dt_module_strtab = dmp->dm_strtab.cts_data; + qsort(dmp->dm_asmap, dmp->dm_aslen, + sizeof (Elf32_Sym *), dt_module_symcomp32); + dt_module_strtab = NULL; +} + +static void +dt_module_symsort64(dt_module_t *dmp) +{ + Elf64_Sym *symtab = (Elf64_Sym *)dmp->dm_symtab.cts_data; + Elf64_Sym **sympp = (Elf64_Sym **)dmp->dm_asmap; + const dt_sym_t *dsp = dmp->dm_symchains + 1; + uint_t i, n = dmp->dm_symfree; + + for (i = 1; i < n; i++, dsp++) { + Elf64_Sym *sym = symtab + dsp->ds_symid; + if (sym->st_value != 0 && + (ELF64_ST_BIND(sym->st_info) != STB_LOCAL || sym->st_size)) + *sympp++ = sym; + } + + dmp->dm_aslen = (uint_t)(sympp - (Elf64_Sym **)dmp->dm_asmap); + assert(dmp->dm_aslen <= dmp->dm_asrsv); + + dt_module_strtab = dmp->dm_strtab.cts_data; + qsort(dmp->dm_asmap, dmp->dm_aslen, + sizeof (Elf64_Sym *), dt_module_symcomp64); + dt_module_strtab = NULL; +} + +static GElf_Sym * +dt_module_symgelf32(const Elf32_Sym *src, GElf_Sym *dst) +{ + if (dst != NULL) { + dst->st_name = src->st_name; + dst->st_info = src->st_info; + dst->st_other = src->st_other; + dst->st_shndx = src->st_shndx; + dst->st_value = src->st_value; + dst->st_size = src->st_size; + } + + return (dst); +} + +static GElf_Sym * +dt_module_symgelf64(const Elf64_Sym *src, GElf_Sym *dst) +{ + if (dst != NULL) + bcopy(src, dst, sizeof (GElf_Sym)); + + return (dst); +} + +static GElf_Sym * +dt_module_symname32(dt_module_t *dmp, const char *name, + GElf_Sym *symp, uint_t *idp) +{ + const Elf32_Sym *symtab = dmp->dm_symtab.cts_data; + const char *strtab = dmp->dm_strtab.cts_data; + + const Elf32_Sym *sym; + const dt_sym_t *dsp; + uint_t i, h; + + if (dmp->dm_nsymelems == 0) + return (NULL); + + h = dt_strtab_hash(name, NULL) % dmp->dm_nsymbuckets; + + for (i = dmp->dm_symbuckets[h]; i != 0; i = dsp->ds_next) { + dsp = &dmp->dm_symchains[i]; + sym = symtab + dsp->ds_symid; + + if (strcmp(name, strtab + sym->st_name) == 0) { + if (idp != NULL) + *idp = dsp->ds_symid; + return (dt_module_symgelf32(sym, symp)); + } + } + + return (NULL); +} + +static GElf_Sym * +dt_module_symname64(dt_module_t *dmp, const char *name, + GElf_Sym *symp, uint_t *idp) +{ + const Elf64_Sym *symtab = dmp->dm_symtab.cts_data; + const char *strtab = dmp->dm_strtab.cts_data; + + const Elf64_Sym *sym; + const dt_sym_t *dsp; + uint_t i, h; + + if (dmp->dm_nsymelems == 0) + return (NULL); + + h = dt_strtab_hash(name, NULL) % dmp->dm_nsymbuckets; + + for (i = dmp->dm_symbuckets[h]; i != 0; i = dsp->ds_next) { + dsp = &dmp->dm_symchains[i]; + sym = symtab + dsp->ds_symid; + + if (strcmp(name, strtab + sym->st_name) == 0) { + if (idp != NULL) + *idp = dsp->ds_symid; + return (dt_module_symgelf64(sym, symp)); + } + } + + return (NULL); +} + +static GElf_Sym * +dt_module_symaddr32(dt_module_t *dmp, GElf_Addr addr, + GElf_Sym *symp, uint_t *idp) +{ + const Elf32_Sym **asmap = (const Elf32_Sym **)dmp->dm_asmap; + const Elf32_Sym *symtab = dmp->dm_symtab.cts_data; + const Elf32_Sym *sym; + + uint_t i, mid, lo = 0, hi = dmp->dm_aslen - 1; + Elf32_Addr v; + + if (dmp->dm_aslen == 0) + return (NULL); + + while (hi - lo > 1) { + mid = (lo + hi) / 2; + if (addr >= asmap[mid]->st_value) + lo = mid; + else + hi = mid; + } + + i = addr < asmap[hi]->st_value ? lo : hi; + sym = asmap[i]; + v = sym->st_value; + + /* + * If the previous entry has the same value, improve our choice. The + * order of equal-valued symbols is determined by the comparison func. + */ + while (i-- != 0 && asmap[i]->st_value == v) + sym = asmap[i]; + + if (addr - sym->st_value < MAX(sym->st_size, 1)) { + if (idp != NULL) + *idp = (uint_t)(sym - symtab); + return (dt_module_symgelf32(sym, symp)); + } + + return (NULL); +} + +static GElf_Sym * +dt_module_symaddr64(dt_module_t *dmp, GElf_Addr addr, + GElf_Sym *symp, uint_t *idp) +{ + const Elf64_Sym **asmap = (const Elf64_Sym **)dmp->dm_asmap; + const Elf64_Sym *symtab = dmp->dm_symtab.cts_data; + const Elf64_Sym *sym; + + uint_t i, mid, lo = 0, hi = dmp->dm_aslen - 1; + Elf64_Addr v; + + if (dmp->dm_aslen == 0) + return (NULL); + + while (hi - lo > 1) { + mid = (lo + hi) / 2; + if (addr >= asmap[mid]->st_value) + lo = mid; + else + hi = mid; + } + + i = addr < asmap[hi]->st_value ? lo : hi; + sym = asmap[i]; + v = sym->st_value; + + /* + * If the previous entry has the same value, improve our choice. The + * order of equal-valued symbols is determined by the comparison func. + */ + while (i-- != 0 && asmap[i]->st_value == v) + sym = asmap[i]; + + if (addr - sym->st_value < MAX(sym->st_size, 1)) { + if (idp != NULL) + *idp = (uint_t)(sym - symtab); + return (dt_module_symgelf64(sym, symp)); + } + + return (NULL); +} + +static const dt_modops_t dt_modops_32 = { + dt_module_syminit32, + dt_module_symsort32, + dt_module_symname32, + dt_module_symaddr32 +}; + +static const dt_modops_t dt_modops_64 = { + dt_module_syminit64, + dt_module_symsort64, + dt_module_symname64, + dt_module_symaddr64 +}; + +dt_module_t * +dt_module_create(dtrace_hdl_t *dtp, const char *name) +{ + long pid; + char *eptr; + dt_ident_t *idp; + uint_t h = dt_strtab_hash(name, NULL) % dtp->dt_modbuckets; + dt_module_t *dmp; + + for (dmp = dtp->dt_mods[h]; dmp != NULL; dmp = dmp->dm_next) { + if (strcmp(dmp->dm_name, name) == 0) + return (dmp); + } + + if ((dmp = malloc(sizeof (dt_module_t))) == NULL) + return (NULL); /* caller must handle allocation failure */ + + bzero(dmp, sizeof (dt_module_t)); + (void) strlcpy(dmp->dm_name, name, sizeof (dmp->dm_name)); + dt_list_append(&dtp->dt_modlist, dmp); + dmp->dm_next = dtp->dt_mods[h]; + dtp->dt_mods[h] = dmp; + dtp->dt_nmods++; + + if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64) + dmp->dm_ops = &dt_modops_64; + else + dmp->dm_ops = &dt_modops_32; + + /* + * Modules for userland processes are special. They always refer to a + * specific process and have a copy of their CTF data from a specific + * instant in time. Any dt_module_t that begins with 'pid' is a module + * for a specific process, much like how any probe description that + * begins with 'pid' is special. pid123 refers to process 123. A module + * that is just 'pid' refers specifically to pid$target. This is + * generally done as D does not currently allow for macros to be + * evaluated when working with types. + */ + if (strncmp(dmp->dm_name, "pid", 3) == 0) { + errno = 0; + if (dmp->dm_name[3] == '\0') { + idp = dt_idhash_lookup(dtp->dt_macros, "target"); + if (idp != NULL && idp->di_id != 0) + dmp->dm_pid = idp->di_id; + } else { + pid = strtol(dmp->dm_name + 3, &eptr, 10); + if (errno == 0 && *eptr == '\0') + dmp->dm_pid = (pid_t)pid; + else + dt_dprintf("encountered malformed pid " + "module: %s\n", dmp->dm_name); + } + } + + return (dmp); +} + +dt_module_t * +dt_module_lookup_by_name(dtrace_hdl_t *dtp, const char *name) +{ + uint_t h = dt_strtab_hash(name, NULL) % dtp->dt_modbuckets; + dt_module_t *dmp; + + for (dmp = dtp->dt_mods[h]; dmp != NULL; dmp = dmp->dm_next) { + if (strcmp(dmp->dm_name, name) == 0) + return (dmp); + } + + return (NULL); +} + +/*ARGSUSED*/ +dt_module_t * +dt_module_lookup_by_ctf(dtrace_hdl_t *dtp, ctf_file_t *ctfp) +{ + return (ctfp ? ctf_getspecific(ctfp) : NULL); +} + +static int +dt_module_load_sect(dtrace_hdl_t *dtp, dt_module_t *dmp, ctf_sect_t *ctsp) +{ + const char *s; + size_t shstrs; + GElf_Shdr sh; + Elf_Data *dp; + Elf_Scn *sp; + + if (elf_getshdrstrndx(dmp->dm_elf, &shstrs) == -1) + return (dt_set_errno(dtp, EDT_NOTLOADED)); + + for (sp = NULL; (sp = elf_nextscn(dmp->dm_elf, sp)) != NULL; ) { + if (gelf_getshdr(sp, &sh) == NULL || sh.sh_type == SHT_NULL || + (s = elf_strptr(dmp->dm_elf, shstrs, sh.sh_name)) == NULL) + continue; /* skip any malformed sections */ + + if (sh.sh_type == ctsp->cts_type && + sh.sh_entsize == ctsp->cts_entsize && + strcmp(s, ctsp->cts_name) == 0) + break; /* section matches specification */ + } + + /* + * If the section isn't found, return success but leave cts_data set + * to NULL and cts_size set to zero for our caller. + */ + if (sp == NULL || (dp = elf_getdata(sp, NULL)) == NULL) + return (0); + +#if defined(sun) + ctsp->cts_data = dp->d_buf; +#else + if ((ctsp->cts_data = malloc(dp->d_size)) == NULL) + return (0); + memcpy(ctsp->cts_data, dp->d_buf, dp->d_size); +#endif + ctsp->cts_size = dp->d_size; + + dt_dprintf("loaded %s [%s] (%lu bytes)\n", + dmp->dm_name, ctsp->cts_name, (ulong_t)ctsp->cts_size); + + return (0); +} + +typedef struct dt_module_cb_arg { + struct ps_prochandle *dpa_proc; + dtrace_hdl_t *dpa_dtp; + dt_module_t *dpa_dmp; + uint_t dpa_count; +} dt_module_cb_arg_t; + +/* ARGSUSED */ +static int +dt_module_load_proc_count(void *arg, const prmap_t *prmap, const char *obj) +{ + ctf_file_t *fp; + dt_module_cb_arg_t *dcp = arg; + + /* Try to grab a ctf container if it exists */ + fp = Pname_to_ctf(dcp->dpa_proc, obj); + if (fp != NULL) + dcp->dpa_count++; + return (0); +} + +/* ARGSUSED */ +static int +dt_module_load_proc_build(void *arg, const prmap_t *prmap, const char *obj) +{ + ctf_file_t *fp; + char buf[MAXPATHLEN], *p; + dt_module_cb_arg_t *dcp = arg; + int count = dcp->dpa_count; + Lmid_t lmid; + + fp = Pname_to_ctf(dcp->dpa_proc, obj); + if (fp == NULL) + return (0); + fp = ctf_dup(fp); + if (fp == NULL) + return (0); + dcp->dpa_dmp->dm_libctfp[count] = fp; + /* + * While it'd be nice to simply use objname here, because of our prior + * actions we'll always get a resolved object name to its on disk file. + * Like the pid provider, we need to tell a bit of a lie here. The type + * that the user thinks of is in terms of the libraries they requested, + * eg. libc.so.1, they don't care about the fact that it's + * libc_hwcap.so.1. + */ + (void) Pobjname(dcp->dpa_proc, prmap->pr_vaddr, buf, sizeof (buf)); + if ((p = strrchr(buf, '/')) == NULL) + p = buf; + else + p++; + + /* + * If for some reason we can't find a link map id for this module, which + * would be really quite weird. We instead just say the link map id is + * zero. + */ + if (Plmid(dcp->dpa_proc, prmap->pr_vaddr, &lmid) != 0) + lmid = 0; + + if (lmid == 0) + dcp->dpa_dmp->dm_libctfn[count] = strdup(p); + else + (void) asprintf(&dcp->dpa_dmp->dm_libctfn[count], + "LM%x`%s", lmid, p); + if (dcp->dpa_dmp->dm_libctfn[count] == NULL) + return (1); + ctf_setspecific(fp, dcp->dpa_dmp); + dcp->dpa_count++; + return (0); +} + +/* + * We've been asked to load data that belongs to another process. As such we're + * going to pgrab it at this instant, load everything that we might ever care + * about, and then drive on. The reason for this is that the process that we're + * interested in might be changing. As long as we have grabbed it, then this + * can't be a problem for us. + * + * For now, we're actually going to punt on most things and just try to get CTF + * data, nothing else. Basically this is only useful as a source of type + * information, we can't go and do the stacktrace lookups, etc. + */ +static int +dt_module_load_proc(dtrace_hdl_t *dtp, dt_module_t *dmp) +{ + struct ps_prochandle *p; + dt_module_cb_arg_t arg; + + /* + * Note that on success we do not release this hold. We must hold this + * for our life time. + */ + p = dt_proc_grab(dtp, dmp->dm_pid, 0, PGRAB_RDONLY | PGRAB_FORCE); + if (p == NULL) { + dt_dprintf("failed to grab pid: %d\n", (int)dmp->dm_pid); + return (dt_set_errno(dtp, EDT_CANTLOAD)); + } + dt_proc_lock(dtp, p); + + arg.dpa_proc = p; + arg.dpa_dtp = dtp; + arg.dpa_dmp = dmp; + arg.dpa_count = 0; + if (Pobject_iter_resolved(p, dt_module_load_proc_count, &arg) != 0) { + dt_dprintf("failed to iterate objects\n"); + dt_proc_release(dtp, p); + return (dt_set_errno(dtp, EDT_CANTLOAD)); + } + + if (arg.dpa_count == 0) { + dt_dprintf("no ctf data present\n"); + dt_proc_unlock(dtp, p); + dt_proc_release(dtp, p); + return (dt_set_errno(dtp, EDT_CANTLOAD)); + } + + dmp->dm_libctfp = malloc(sizeof (ctf_file_t *) * arg.dpa_count); + if (dmp->dm_libctfp == NULL) { + dt_proc_unlock(dtp, p); + dt_proc_release(dtp, p); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + bzero(dmp->dm_libctfp, sizeof (ctf_file_t *) * arg.dpa_count); + + dmp->dm_libctfn = malloc(sizeof (char *) * arg.dpa_count); + if (dmp->dm_libctfn == NULL) { + free(dmp->dm_libctfp); + dt_proc_unlock(dtp, p); + dt_proc_release(dtp, p); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + bzero(dmp->dm_libctfn, sizeof (char *) * arg.dpa_count); + + dmp->dm_nctflibs = arg.dpa_count; + + arg.dpa_count = 0; + if (Pobject_iter_resolved(p, dt_module_load_proc_build, &arg) != 0) { + dt_proc_unlock(dtp, p); + dt_module_unload(dtp, dmp); + dt_proc_release(dtp, p); + return (dt_set_errno(dtp, EDT_CANTLOAD)); + } + assert(arg.dpa_count == dmp->dm_nctflibs); + dt_dprintf("loaded %d ctf modules for pid %d\n", arg.dpa_count, + (int)dmp->dm_pid); + + dt_proc_unlock(dtp, p); + dt_proc_release(dtp, p); + dmp->dm_flags |= DT_DM_LOADED; + + return (0); +} + +int +dt_module_load(dtrace_hdl_t *dtp, dt_module_t *dmp) +{ + if (dmp->dm_flags & DT_DM_LOADED) + return (0); /* module is already loaded */ + + if (dmp->dm_pid != 0) + return (dt_module_load_proc(dtp, dmp)); + + dmp->dm_ctdata.cts_name = ".SUNW_ctf"; + dmp->dm_ctdata.cts_type = SHT_PROGBITS; + dmp->dm_ctdata.cts_flags = 0; + dmp->dm_ctdata.cts_data = NULL; + dmp->dm_ctdata.cts_size = 0; + dmp->dm_ctdata.cts_entsize = 0; + dmp->dm_ctdata.cts_offset = 0; + + dmp->dm_symtab.cts_name = ".symtab"; + dmp->dm_symtab.cts_type = SHT_SYMTAB; + dmp->dm_symtab.cts_flags = 0; + dmp->dm_symtab.cts_data = NULL; + dmp->dm_symtab.cts_size = 0; + dmp->dm_symtab.cts_entsize = dmp->dm_ops == &dt_modops_64 ? + sizeof (Elf64_Sym) : sizeof (Elf32_Sym); + dmp->dm_symtab.cts_offset = 0; + + dmp->dm_strtab.cts_name = ".strtab"; + dmp->dm_strtab.cts_type = SHT_STRTAB; + dmp->dm_strtab.cts_flags = 0; + dmp->dm_strtab.cts_data = NULL; + dmp->dm_strtab.cts_size = 0; + dmp->dm_strtab.cts_entsize = 0; + dmp->dm_strtab.cts_offset = 0; + + /* + * Attempt to load the module's CTF section, symbol table section, and + * string table section. Note that modules may not contain CTF data: + * this will result in a successful load_sect but data of size zero. + * We will then fail if dt_module_getctf() is called, as shown below. + */ + if (dt_module_load_sect(dtp, dmp, &dmp->dm_ctdata) == -1 || + dt_module_load_sect(dtp, dmp, &dmp->dm_symtab) == -1 || + dt_module_load_sect(dtp, dmp, &dmp->dm_strtab) == -1) { + dt_module_unload(dtp, dmp); + return (-1); /* dt_errno is set for us */ + } + + /* + * Allocate the hash chains and hash buckets for symbol name lookup. + * This is relatively simple since the symbol table is of fixed size + * and is known in advance. We allocate one extra element since we + * use element indices instead of pointers and zero is our sentinel. + */ + dmp->dm_nsymelems = + dmp->dm_symtab.cts_size / dmp->dm_symtab.cts_entsize; + + dmp->dm_nsymbuckets = _dtrace_strbuckets; + dmp->dm_symfree = 1; /* first free element is index 1 */ + + dmp->dm_symbuckets = malloc(sizeof (uint_t) * dmp->dm_nsymbuckets); + dmp->dm_symchains = malloc(sizeof (dt_sym_t) * dmp->dm_nsymelems + 1); + + if (dmp->dm_symbuckets == NULL || dmp->dm_symchains == NULL) { + dt_module_unload(dtp, dmp); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + bzero(dmp->dm_symbuckets, sizeof (uint_t) * dmp->dm_nsymbuckets); + bzero(dmp->dm_symchains, sizeof (dt_sym_t) * dmp->dm_nsymelems + 1); + + /* + * Iterate over the symbol table data buffer and insert each symbol + * name into the name hash if the name and type are valid. Then + * allocate the address map, fill it in, and sort it. + */ + dmp->dm_asrsv = dmp->dm_ops->do_syminit(dmp); + + dt_dprintf("hashed %s [%s] (%u symbols)\n", + dmp->dm_name, dmp->dm_symtab.cts_name, dmp->dm_symfree - 1); + + if ((dmp->dm_asmap = malloc(sizeof (void *) * dmp->dm_asrsv)) == NULL) { + dt_module_unload(dtp, dmp); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + dmp->dm_ops->do_symsort(dmp); + + dt_dprintf("sorted %s [%s] (%u symbols)\n", + dmp->dm_name, dmp->dm_symtab.cts_name, dmp->dm_aslen); + + dmp->dm_flags |= DT_DM_LOADED; + return (0); +} + +int +dt_module_hasctf(dtrace_hdl_t *dtp, dt_module_t *dmp) +{ + if (dmp->dm_pid != 0 && dmp->dm_nctflibs > 0) + return (1); + return (dt_module_getctf(dtp, dmp) != NULL); +} + +ctf_file_t * +dt_module_getctf(dtrace_hdl_t *dtp, dt_module_t *dmp) +{ + const char *parent; + dt_module_t *pmp; + ctf_file_t *pfp; + int model; + + if (dmp->dm_ctfp != NULL || dt_module_load(dtp, dmp) != 0) + return (dmp->dm_ctfp); + + if (dmp->dm_ops == &dt_modops_64) + model = CTF_MODEL_LP64; + else + model = CTF_MODEL_ILP32; + + /* + * If the data model of the module does not match our program data + * model, then do not permit CTF from this module to be opened and + * returned to the compiler. If we support mixed data models in the + * future for combined kernel/user tracing, this can be removed. + */ + if (dtp->dt_conf.dtc_ctfmodel != model) { + (void) dt_set_errno(dtp, EDT_DATAMODEL); + return (NULL); + } + + if (dmp->dm_ctdata.cts_size == 0) { + (void) dt_set_errno(dtp, EDT_NOCTF); + return (NULL); + } + + dmp->dm_ctfp = ctf_bufopen(&dmp->dm_ctdata, + &dmp->dm_symtab, &dmp->dm_strtab, &dtp->dt_ctferr); + + if (dmp->dm_ctfp == NULL) { + (void) dt_set_errno(dtp, EDT_CTF); + return (NULL); + } + + (void) ctf_setmodel(dmp->dm_ctfp, model); + ctf_setspecific(dmp->dm_ctfp, dmp); + + if ((parent = ctf_parent_name(dmp->dm_ctfp)) != NULL) { + if ((pmp = dt_module_create(dtp, parent)) == NULL || + (pfp = dt_module_getctf(dtp, pmp)) == NULL) { + if (pmp == NULL) + (void) dt_set_errno(dtp, EDT_NOMEM); + goto err; + } + + if (ctf_import(dmp->dm_ctfp, pfp) == CTF_ERR) { + dtp->dt_ctferr = ctf_errno(dmp->dm_ctfp); + (void) dt_set_errno(dtp, EDT_CTF); + goto err; + } + } + + dt_dprintf("loaded CTF container for %s (%p)\n", + dmp->dm_name, (void *)dmp->dm_ctfp); + + return (dmp->dm_ctfp); + +err: + ctf_close(dmp->dm_ctfp); + dmp->dm_ctfp = NULL; + return (NULL); +} + +/*ARGSUSED*/ +void +dt_module_unload(dtrace_hdl_t *dtp, dt_module_t *dmp) +{ + int i; + + ctf_close(dmp->dm_ctfp); + dmp->dm_ctfp = NULL; + +#if !defined(sun) + if (dmp->dm_ctdata.cts_data != NULL) { + free(dmp->dm_ctdata.cts_data); + } + if (dmp->dm_symtab.cts_data != NULL) { + free(dmp->dm_symtab.cts_data); + } + if (dmp->dm_strtab.cts_data != NULL) { + free(dmp->dm_strtab.cts_data); + } +#endif + + if (dmp->dm_libctfp != NULL) { + for (i = 0; i < dmp->dm_nctflibs; i++) { + ctf_close(dmp->dm_libctfp[i]); + free(dmp->dm_libctfn[i]); + } + free(dmp->dm_libctfp); + free(dmp->dm_libctfn); + dmp->dm_libctfp = NULL; + dmp->dm_nctflibs = 0; + } + + bzero(&dmp->dm_ctdata, sizeof (ctf_sect_t)); + bzero(&dmp->dm_symtab, sizeof (ctf_sect_t)); + bzero(&dmp->dm_strtab, sizeof (ctf_sect_t)); + + if (dmp->dm_symbuckets != NULL) { + free(dmp->dm_symbuckets); + dmp->dm_symbuckets = NULL; + } + + if (dmp->dm_symchains != NULL) { + free(dmp->dm_symchains); + dmp->dm_symchains = NULL; + } + + if (dmp->dm_asmap != NULL) { + free(dmp->dm_asmap); + dmp->dm_asmap = NULL; + } +#if defined(__FreeBSD__) + if (dmp->dm_sec_offsets != NULL) { + free(dmp->dm_sec_offsets); + dmp->dm_sec_offsets = NULL; + } +#endif + dmp->dm_symfree = 0; + dmp->dm_nsymbuckets = 0; + dmp->dm_nsymelems = 0; + dmp->dm_asrsv = 0; + dmp->dm_aslen = 0; + + dmp->dm_text_va = 0; + dmp->dm_text_size = 0; + dmp->dm_data_va = 0; + dmp->dm_data_size = 0; + dmp->dm_bss_va = 0; + dmp->dm_bss_size = 0; + + if (dmp->dm_extern != NULL) { + dt_idhash_destroy(dmp->dm_extern); + dmp->dm_extern = NULL; + } + + (void) elf_end(dmp->dm_elf); + dmp->dm_elf = NULL; + + dmp->dm_pid = 0; + + dmp->dm_flags &= ~DT_DM_LOADED; +} + +void +dt_module_destroy(dtrace_hdl_t *dtp, dt_module_t *dmp) +{ + uint_t h = dt_strtab_hash(dmp->dm_name, NULL) % dtp->dt_modbuckets; + dt_module_t **dmpp = &dtp->dt_mods[h]; + + dt_list_delete(&dtp->dt_modlist, dmp); + assert(dtp->dt_nmods != 0); + dtp->dt_nmods--; + + /* + * Now remove this module from its hash chain. We expect to always + * find the module on its hash chain, so in this loop we assert that + * we don't run off the end of the list. + */ + while (*dmpp != dmp) { + dmpp = &((*dmpp)->dm_next); + assert(*dmpp != NULL); + } + + *dmpp = dmp->dm_next; + + dt_module_unload(dtp, dmp); + free(dmp); +} + +/* + * Insert a new external symbol reference into the specified module. The new + * symbol will be marked as undefined and is assigned a symbol index beyond + * any existing cached symbols from this module. We use the ident's di_data + * field to store a pointer to a copy of the dtrace_syminfo_t for this symbol. + */ +dt_ident_t * +dt_module_extern(dtrace_hdl_t *dtp, dt_module_t *dmp, + const char *name, const dtrace_typeinfo_t *tip) +{ + dtrace_syminfo_t *sip; + dt_ident_t *idp; + uint_t id; + + if (dmp->dm_extern == NULL && (dmp->dm_extern = dt_idhash_create( + "extern", NULL, dmp->dm_nsymelems, UINT_MAX)) == NULL) { + (void) dt_set_errno(dtp, EDT_NOMEM); + return (NULL); + } + + if (dt_idhash_nextid(dmp->dm_extern, &id) == -1) { + (void) dt_set_errno(dtp, EDT_SYMOFLOW); + return (NULL); + } + + if ((sip = malloc(sizeof (dtrace_syminfo_t))) == NULL) { + (void) dt_set_errno(dtp, EDT_NOMEM); + return (NULL); + } + + idp = dt_idhash_insert(dmp->dm_extern, name, DT_IDENT_SYMBOL, 0, id, + _dtrace_symattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen); + + if (idp == NULL) { + (void) dt_set_errno(dtp, EDT_NOMEM); + free(sip); + return (NULL); + } + + sip->dts_object = dmp->dm_name; + sip->dts_name = idp->di_name; + sip->dts_id = idp->di_id; + + idp->di_data = sip; + idp->di_ctfp = tip->dtt_ctfp; + idp->di_type = tip->dtt_type; + + return (idp); +} + +const char * +dt_module_modelname(dt_module_t *dmp) +{ + if (dmp->dm_ops == &dt_modops_64) + return ("64-bit"); + else + return ("32-bit"); +} + +/* ARGSUSED */ +int +dt_module_getlibid(dtrace_hdl_t *dtp, dt_module_t *dmp, const ctf_file_t *fp) +{ + int i; + + for (i = 0; i < dmp->dm_nctflibs; i++) { + if (dmp->dm_libctfp[i] == fp) + return (i); + } + + return (-1); +} + +/* ARGSUSED */ +ctf_file_t * +dt_module_getctflib(dtrace_hdl_t *dtp, dt_module_t *dmp, const char *name) +{ + int i; + + for (i = 0; i < dmp->dm_nctflibs; i++) { + if (strcmp(dmp->dm_libctfn[i], name) == 0) + return (dmp->dm_libctfp[i]); + } + + return (NULL); +} + +/* + * Update our module cache by adding an entry for the specified module 'name'. + * We create the dt_module_t and populate it using /system/object/<name>/. + * + * On FreeBSD, the module name is passed as the full module file name, + * including the path. + */ +static void +#if defined(sun) +dt_module_update(dtrace_hdl_t *dtp, const char *name) +#else +dt_module_update(dtrace_hdl_t *dtp, struct kld_file_stat *k_stat) +#endif +{ + char fname[MAXPATHLEN]; + struct stat64 st; + int fd, err, bits; + + dt_module_t *dmp; + const char *s; + size_t shstrs; + GElf_Shdr sh; + Elf_Data *dp; + Elf_Scn *sp; + +#if defined(sun) + (void) snprintf(fname, sizeof (fname), + "%s/%s/object", OBJFS_ROOT, name); +#else + GElf_Ehdr ehdr; + GElf_Phdr ph; + char name[MAXPATHLEN]; + uintptr_t mapbase, alignmask; + int i = 0; + int is_elf_obj; + + (void) strlcpy(name, k_stat->name, sizeof(name)); + (void) strlcpy(fname, k_stat->pathname, sizeof(fname)); +#endif + + if ((fd = open(fname, O_RDONLY)) == -1 || fstat64(fd, &st) == -1 || + (dmp = dt_module_create(dtp, name)) == NULL) { + dt_dprintf("failed to open %s: %s\n", fname, strerror(errno)); + (void) close(fd); + return; + } + + /* + * Since the module can unload out from under us (and /system/object + * will return ENOENT), tell libelf to cook the entire file now and + * then close the underlying file descriptor immediately. If this + * succeeds, we know that we can continue safely using dmp->dm_elf. + */ + dmp->dm_elf = elf_begin(fd, ELF_C_READ, NULL); + err = elf_cntl(dmp->dm_elf, ELF_C_FDREAD); + (void) close(fd); + + if (dmp->dm_elf == NULL || err == -1 || + elf_getshdrstrndx(dmp->dm_elf, &shstrs) == -1) { + dt_dprintf("failed to load %s: %s\n", + fname, elf_errmsg(elf_errno())); + dt_module_destroy(dtp, dmp); + return; + } + + switch (gelf_getclass(dmp->dm_elf)) { + case ELFCLASS32: + dmp->dm_ops = &dt_modops_32; + bits = 32; + break; + case ELFCLASS64: + dmp->dm_ops = &dt_modops_64; + bits = 64; + break; + default: + dt_dprintf("failed to load %s: unknown ELF class\n", fname); + dt_module_destroy(dtp, dmp); + return; + } +#if defined(__FreeBSD__) + mapbase = (uintptr_t)k_stat->address; + gelf_getehdr(dmp->dm_elf, &ehdr); + is_elf_obj = (ehdr.e_type == ET_REL); + if (is_elf_obj) { + dmp->dm_sec_offsets = + malloc(ehdr.e_shnum * sizeof(*dmp->dm_sec_offsets)); + if (dmp->dm_sec_offsets == NULL) { + dt_dprintf("failed to allocate memory\n"); + dt_module_destroy(dtp, dmp); + return; + } + } +#endif + /* + * Iterate over the section headers locating various sections of + * interest and use their attributes to flesh out the dt_module_t. + */ + for (sp = NULL; (sp = elf_nextscn(dmp->dm_elf, sp)) != NULL; ) { + if (gelf_getshdr(sp, &sh) == NULL || sh.sh_type == SHT_NULL || + (s = elf_strptr(dmp->dm_elf, shstrs, sh.sh_name)) == NULL) + continue; /* skip any malformed sections */ +#if defined(__FreeBSD__) + if (sh.sh_size == 0) + continue; + if (sh.sh_type == SHT_PROGBITS || sh.sh_type == SHT_NOBITS) { + alignmask = sh.sh_addralign - 1; + mapbase += alignmask; + mapbase &= ~alignmask; + sh.sh_addr = mapbase; + if (is_elf_obj) + dmp->dm_sec_offsets[elf_ndxscn(sp)] = sh.sh_addr; + mapbase += sh.sh_size; + } +#endif + if (strcmp(s, ".text") == 0) { + dmp->dm_text_size = sh.sh_size; + dmp->dm_text_va = sh.sh_addr; + } else if (strcmp(s, ".data") == 0) { + dmp->dm_data_size = sh.sh_size; + dmp->dm_data_va = sh.sh_addr; + } else if (strcmp(s, ".bss") == 0) { + dmp->dm_bss_size = sh.sh_size; + dmp->dm_bss_va = sh.sh_addr; + } else if (strcmp(s, ".info") == 0 && + (dp = elf_getdata(sp, NULL)) != NULL) { + bcopy(dp->d_buf, &dmp->dm_info, + MIN(sh.sh_size, sizeof (dmp->dm_info))); + } else if (strcmp(s, ".filename") == 0 && + (dp = elf_getdata(sp, NULL)) != NULL) { + (void) strlcpy(dmp->dm_file, + dp->d_buf, sizeof (dmp->dm_file)); + } + } + + dmp->dm_flags |= DT_DM_KERNEL; +#if defined(sun) + dmp->dm_modid = (int)OBJFS_MODID(st.st_ino); +#else + /* + * Include .rodata and special sections into .text. + * This depends on default section layout produced by GNU ld + * for ELF objects and libraries: + * [Text][R/O data][R/W data][Dynamic][BSS][Non loadable] + */ + dmp->dm_text_size = dmp->dm_data_va - dmp->dm_text_va; +#if defined(__i386__) + /* + * Find the first load section and figure out the relocation + * offset for the symbols. The kernel module will not need + * relocation, but the kernel linker modules will. + */ + for (i = 0; gelf_getphdr(dmp->dm_elf, i, &ph) != NULL; i++) { + if (ph.p_type == PT_LOAD) { + dmp->dm_reloc_offset = k_stat->address - ph.p_vaddr; + break; + } + } +#endif +#endif + + if (dmp->dm_info.objfs_info_primary) + dmp->dm_flags |= DT_DM_PRIMARY; + + dt_dprintf("opened %d-bit module %s (%s) [%d]\n", + bits, dmp->dm_name, dmp->dm_file, dmp->dm_modid); +} + +/* + * Unload all the loaded modules and then refresh the module cache with the + * latest list of loaded modules and their address ranges. + */ +void +dtrace_update(dtrace_hdl_t *dtp) +{ + dt_module_t *dmp; + DIR *dirp; +#if defined(__FreeBSD__) + int fileid; +#endif + + for (dmp = dt_list_next(&dtp->dt_modlist); + dmp != NULL; dmp = dt_list_next(dmp)) + dt_module_unload(dtp, dmp); + +#if defined(sun) + /* + * Open /system/object and attempt to create a libdtrace module for + * each kernel module that is loaded on the current system. + */ + if (!(dtp->dt_oflags & DTRACE_O_NOSYS) && + (dirp = opendir(OBJFS_ROOT)) != NULL) { + struct dirent *dp; + + while ((dp = readdir(dirp)) != NULL) { + if (dp->d_name[0] != '.') + dt_module_update(dtp, dp->d_name); + } + + (void) closedir(dirp); + } +#elif defined(__FreeBSD__) + /* + * Use FreeBSD's kernel loader interface to discover what kernel + * modules are loaded and create a libdtrace module for each one. + */ + for (fileid = kldnext(0); fileid > 0; fileid = kldnext(fileid)) { + struct kld_file_stat k_stat; + k_stat.version = sizeof(k_stat); + if (kldstat(fileid, &k_stat) == 0) + dt_module_update(dtp, &k_stat); + } +#endif + + /* + * Look up all the macro identifiers and set di_id to the latest value. + * This code collaborates with dt_lex.l on the use of di_id. We will + * need to implement something fancier if we need to support non-ints. + */ + dt_idhash_lookup(dtp->dt_macros, "egid")->di_id = getegid(); + dt_idhash_lookup(dtp->dt_macros, "euid")->di_id = geteuid(); + dt_idhash_lookup(dtp->dt_macros, "gid")->di_id = getgid(); + dt_idhash_lookup(dtp->dt_macros, "pid")->di_id = getpid(); + dt_idhash_lookup(dtp->dt_macros, "pgid")->di_id = getpgid(0); + dt_idhash_lookup(dtp->dt_macros, "ppid")->di_id = getppid(); +#if defined(sun) + dt_idhash_lookup(dtp->dt_macros, "projid")->di_id = getprojid(); +#endif + dt_idhash_lookup(dtp->dt_macros, "sid")->di_id = getsid(0); +#if defined(sun) + dt_idhash_lookup(dtp->dt_macros, "taskid")->di_id = gettaskid(); +#endif + dt_idhash_lookup(dtp->dt_macros, "uid")->di_id = getuid(); + + /* + * Cache the pointers to the modules representing the base executable + * and the run-time linker in the dtrace client handle. Note that on + * x86 krtld is folded into unix, so if we don't find it, use unix + * instead. + */ + dtp->dt_exec = dt_module_lookup_by_name(dtp, "genunix"); + dtp->dt_rtld = dt_module_lookup_by_name(dtp, "krtld"); + if (dtp->dt_rtld == NULL) + dtp->dt_rtld = dt_module_lookup_by_name(dtp, "unix"); + + /* + * If this is the first time we are initializing the module list, + * remove the module for genunix from the module list and then move it + * to the front of the module list. We do this so that type and symbol + * queries encounter genunix and thereby optimize for the common case + * in dtrace_lookup_by_name() and dtrace_lookup_by_type(), below. + */ + if (dtp->dt_exec != NULL && + dtp->dt_cdefs == NULL && dtp->dt_ddefs == NULL) { + dt_list_delete(&dtp->dt_modlist, dtp->dt_exec); + dt_list_prepend(&dtp->dt_modlist, dtp->dt_exec); + } +} + +static dt_module_t * +dt_module_from_object(dtrace_hdl_t *dtp, const char *object) +{ + int err = EDT_NOMOD; + dt_module_t *dmp; + + switch ((uintptr_t)object) { + case (uintptr_t)DTRACE_OBJ_EXEC: + dmp = dtp->dt_exec; + break; + case (uintptr_t)DTRACE_OBJ_RTLD: + dmp = dtp->dt_rtld; + break; + case (uintptr_t)DTRACE_OBJ_CDEFS: + dmp = dtp->dt_cdefs; + break; + case (uintptr_t)DTRACE_OBJ_DDEFS: + dmp = dtp->dt_ddefs; + break; + default: + dmp = dt_module_create(dtp, object); + err = EDT_NOMEM; + } + + if (dmp == NULL) + (void) dt_set_errno(dtp, err); + + return (dmp); +} + +/* + * Exported interface to look up a symbol by name. We return the GElf_Sym and + * complete symbol information for the matching symbol. + */ +int +dtrace_lookup_by_name(dtrace_hdl_t *dtp, const char *object, const char *name, + GElf_Sym *symp, dtrace_syminfo_t *sip) +{ + dt_module_t *dmp; + dt_ident_t *idp; + uint_t n, id; + GElf_Sym sym; + + uint_t mask = 0; /* mask of dt_module flags to match */ + uint_t bits = 0; /* flag bits that must be present */ + + if (object != DTRACE_OBJ_EVERY && + object != DTRACE_OBJ_KMODS && + object != DTRACE_OBJ_UMODS) { + if ((dmp = dt_module_from_object(dtp, object)) == NULL) + return (-1); /* dt_errno is set for us */ + + if (dt_module_load(dtp, dmp) == -1) + return (-1); /* dt_errno is set for us */ + n = 1; + + } else { + if (object == DTRACE_OBJ_KMODS) + mask = bits = DT_DM_KERNEL; + else if (object == DTRACE_OBJ_UMODS) + mask = DT_DM_KERNEL; + + dmp = dt_list_next(&dtp->dt_modlist); + n = dtp->dt_nmods; + } + + if (symp == NULL) + symp = &sym; + + for (; n > 0; n--, dmp = dt_list_next(dmp)) { + if ((dmp->dm_flags & mask) != bits) + continue; /* failed to match required attributes */ + + if (dt_module_load(dtp, dmp) == -1) + continue; /* failed to load symbol table */ + + if (dmp->dm_ops->do_symname(dmp, name, symp, &id) != NULL) { + if (sip != NULL) { + sip->dts_object = dmp->dm_name; + sip->dts_name = (const char *) + dmp->dm_strtab.cts_data + symp->st_name; + sip->dts_id = id; + } + return (0); + } + + if (dmp->dm_extern != NULL && + (idp = dt_idhash_lookup(dmp->dm_extern, name)) != NULL) { + if (symp != &sym) { + symp->st_name = (uintptr_t)idp->di_name; + symp->st_info = + GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); + symp->st_other = 0; + symp->st_shndx = SHN_UNDEF; + symp->st_value = 0; + symp->st_size = + ctf_type_size(idp->di_ctfp, idp->di_type); + } + + if (sip != NULL) { + sip->dts_object = dmp->dm_name; + sip->dts_name = idp->di_name; + sip->dts_id = idp->di_id; + } + + return (0); + } + } + + return (dt_set_errno(dtp, EDT_NOSYM)); +} + +/* + * Exported interface to look up a symbol by address. We return the GElf_Sym + * and complete symbol information for the matching symbol. + */ +int +dtrace_lookup_by_addr(dtrace_hdl_t *dtp, GElf_Addr addr, + GElf_Sym *symp, dtrace_syminfo_t *sip) +{ + dt_module_t *dmp; + uint_t id; + const dtrace_vector_t *v = dtp->dt_vector; + + if (v != NULL) + return (v->dtv_lookup_by_addr(dtp->dt_varg, addr, symp, sip)); + + for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL; + dmp = dt_list_next(dmp)) { + if (addr - dmp->dm_text_va < dmp->dm_text_size || + addr - dmp->dm_data_va < dmp->dm_data_size || + addr - dmp->dm_bss_va < dmp->dm_bss_size) + break; + } + + if (dmp == NULL) + return (dt_set_errno(dtp, EDT_NOSYMADDR)); + + if (dt_module_load(dtp, dmp) == -1) + return (-1); /* dt_errno is set for us */ + + if (symp != NULL) { + if (dmp->dm_ops->do_symaddr(dmp, addr, symp, &id) == NULL) + return (dt_set_errno(dtp, EDT_NOSYMADDR)); + } + + if (sip != NULL) { + sip->dts_object = dmp->dm_name; + + if (symp != NULL) { + sip->dts_name = (const char *) + dmp->dm_strtab.cts_data + symp->st_name; + sip->dts_id = id; + } else { + sip->dts_name = NULL; + sip->dts_id = 0; + } + } + + return (0); +} + +int +dtrace_lookup_by_type(dtrace_hdl_t *dtp, const char *object, const char *name, + dtrace_typeinfo_t *tip) +{ + dtrace_typeinfo_t ti; + dt_module_t *dmp; + int found = 0; + ctf_id_t id; + uint_t n, i; + int justone; + ctf_file_t *fp; + char *buf, *p, *q; + + uint_t mask = 0; /* mask of dt_module flags to match */ + uint_t bits = 0; /* flag bits that must be present */ + + if (object != DTRACE_OBJ_EVERY && + object != DTRACE_OBJ_KMODS && + object != DTRACE_OBJ_UMODS) { + if ((dmp = dt_module_from_object(dtp, object)) == NULL) + return (-1); /* dt_errno is set for us */ + + if (dt_module_load(dtp, dmp) == -1) + return (-1); /* dt_errno is set for us */ + n = 1; + justone = 1; + } else { + if (object == DTRACE_OBJ_KMODS) + mask = bits = DT_DM_KERNEL; + else if (object == DTRACE_OBJ_UMODS) + mask = DT_DM_KERNEL; + + dmp = dt_list_next(&dtp->dt_modlist); + n = dtp->dt_nmods; + justone = 0; + } + + if (tip == NULL) + tip = &ti; + + for (; n > 0; n--, dmp = dt_list_next(dmp)) { + if ((dmp->dm_flags & mask) != bits) + continue; /* failed to match required attributes */ + + /* + * If we can't load the CTF container, continue on to the next + * module. If our search was scoped to only one module then + * return immediately leaving dt_errno unmodified. + */ + if (dt_module_hasctf(dtp, dmp) == 0) { + if (justone) + return (-1); + continue; + } + + /* + * Look up the type in the module's CTF container. If our + * match is a forward declaration tag, save this choice in + * 'tip' and keep going in the hope that we will locate the + * underlying structure definition. Otherwise just return. + */ + if (dmp->dm_pid == 0) { + id = ctf_lookup_by_name(dmp->dm_ctfp, name); + fp = dmp->dm_ctfp; + } else { + if ((p = strchr(name, '`')) != NULL) { + buf = strdup(name); + if (buf == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + p = strchr(buf, '`'); + if ((q = strchr(p + 1, '`')) != NULL) + p = q; + *p = '\0'; + fp = dt_module_getctflib(dtp, dmp, buf); + if (fp == NULL || (id = ctf_lookup_by_name(fp, + p + 1)) == CTF_ERR) + id = CTF_ERR; + free(buf); + } else { + for (i = 0; i < dmp->dm_nctflibs; i++) { + fp = dmp->dm_libctfp[i]; + id = ctf_lookup_by_name(fp, name); + if (id != CTF_ERR) + break; + } + } + } + if (id != CTF_ERR) { + tip->dtt_object = dmp->dm_name; + tip->dtt_ctfp = fp; + tip->dtt_type = id; + if (ctf_type_kind(fp, ctf_type_resolve(fp, id)) != + CTF_K_FORWARD) + return (0); + + found++; + } + } + + if (found == 0) + return (dt_set_errno(dtp, EDT_NOTYPE)); + + return (0); +} + +int +dtrace_symbol_type(dtrace_hdl_t *dtp, const GElf_Sym *symp, + const dtrace_syminfo_t *sip, dtrace_typeinfo_t *tip) +{ + dt_module_t *dmp; + + tip->dtt_object = NULL; + tip->dtt_ctfp = NULL; + tip->dtt_type = CTF_ERR; + tip->dtt_flags = 0; + + if ((dmp = dt_module_lookup_by_name(dtp, sip->dts_object)) == NULL) + return (dt_set_errno(dtp, EDT_NOMOD)); + + if (symp->st_shndx == SHN_UNDEF && dmp->dm_extern != NULL) { + dt_ident_t *idp = + dt_idhash_lookup(dmp->dm_extern, sip->dts_name); + + if (idp == NULL) + return (dt_set_errno(dtp, EDT_NOSYM)); + + tip->dtt_ctfp = idp->di_ctfp; + tip->dtt_type = idp->di_type; + + } else if (GELF_ST_TYPE(symp->st_info) != STT_FUNC) { + if (dt_module_getctf(dtp, dmp) == NULL) + return (-1); /* errno is set for us */ + + tip->dtt_ctfp = dmp->dm_ctfp; + tip->dtt_type = ctf_lookup_by_symbol(dmp->dm_ctfp, sip->dts_id); + + if (tip->dtt_type == CTF_ERR) { + dtp->dt_ctferr = ctf_errno(tip->dtt_ctfp); + return (dt_set_errno(dtp, EDT_CTF)); + } + + } else { + tip->dtt_ctfp = DT_FPTR_CTFP(dtp); + tip->dtt_type = DT_FPTR_TYPE(dtp); + } + + tip->dtt_object = dmp->dm_name; + return (0); +} + +static dtrace_objinfo_t * +dt_module_info(const dt_module_t *dmp, dtrace_objinfo_t *dto) +{ + dto->dto_name = dmp->dm_name; + dto->dto_file = dmp->dm_file; + dto->dto_id = dmp->dm_modid; + dto->dto_flags = 0; + + if (dmp->dm_flags & DT_DM_KERNEL) + dto->dto_flags |= DTRACE_OBJ_F_KERNEL; + if (dmp->dm_flags & DT_DM_PRIMARY) + dto->dto_flags |= DTRACE_OBJ_F_PRIMARY; + + dto->dto_text_va = dmp->dm_text_va; + dto->dto_text_size = dmp->dm_text_size; + dto->dto_data_va = dmp->dm_data_va; + dto->dto_data_size = dmp->dm_data_size; + dto->dto_bss_va = dmp->dm_bss_va; + dto->dto_bss_size = dmp->dm_bss_size; + + return (dto); +} + +int +dtrace_object_iter(dtrace_hdl_t *dtp, dtrace_obj_f *func, void *data) +{ + const dt_module_t *dmp = dt_list_next(&dtp->dt_modlist); + dtrace_objinfo_t dto; + int rv; + + for (; dmp != NULL; dmp = dt_list_next(dmp)) { + if ((rv = (*func)(dtp, dt_module_info(dmp, &dto), data)) != 0) + return (rv); + } + + return (0); +} + +int +dtrace_object_info(dtrace_hdl_t *dtp, const char *object, dtrace_objinfo_t *dto) +{ + dt_module_t *dmp; + + if (object == DTRACE_OBJ_EVERY || object == DTRACE_OBJ_KMODS || + object == DTRACE_OBJ_UMODS || dto == NULL) + return (dt_set_errno(dtp, EINVAL)); + + if ((dmp = dt_module_from_object(dtp, object)) == NULL) + return (-1); /* dt_errno is set for us */ + + if (dt_module_load(dtp, dmp) == -1) + return (-1); /* dt_errno is set for us */ + + (void) dt_module_info(dmp, dto); + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.h new file mode 100644 index 0000000..d103e02 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.h @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#ifndef _DT_MODULE_H +#define _DT_MODULE_H + +#include <dt_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern dt_module_t *dt_module_create(dtrace_hdl_t *, const char *); +extern int dt_module_load(dtrace_hdl_t *, dt_module_t *); +extern void dt_module_unload(dtrace_hdl_t *, dt_module_t *); +extern void dt_module_destroy(dtrace_hdl_t *, dt_module_t *); + +extern dt_module_t *dt_module_lookup_by_name(dtrace_hdl_t *, const char *); +extern dt_module_t *dt_module_lookup_by_ctf(dtrace_hdl_t *, ctf_file_t *); + +extern int dt_module_hasctf(dtrace_hdl_t *, dt_module_t *); +extern ctf_file_t *dt_module_getctf(dtrace_hdl_t *, dt_module_t *); +extern dt_ident_t *dt_module_extern(dtrace_hdl_t *, dt_module_t *, + const char *, const dtrace_typeinfo_t *); + +extern const char *dt_module_modelname(dt_module_t *); +extern int dt_module_getlibid(dtrace_hdl_t *, dt_module_t *, + const ctf_file_t *); +extern ctf_file_t *dt_module_getctflib(dtrace_hdl_t *, dt_module_t *, + const char *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_MODULE_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_open.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_open.c new file mode 100644 index 0000000..0eb2a2c --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_open.c @@ -0,0 +1,1701 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <sys/types.h> +#if defined(sun) +#include <sys/modctl.h> +#include <sys/systeminfo.h> +#else +/* FreeBSD */ +#include <sys/param.h> +#include <sys/module.h> +#include <sys/linker.h> +#endif +#include <sys/resource.h> + +#include <libelf.h> +#include <strings.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <limits.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <assert.h> + +#define _POSIX_PTHREAD_SEMANTICS +#include <dirent.h> +#undef _POSIX_PTHREAD_SEMANTICS + +#include <dt_impl.h> +#include <dt_program.h> +#include <dt_module.h> +#include <dt_printf.h> +#include <dt_string.h> +#include <dt_provider.h> +#if !defined(sun) +#include <sys/sysctl.h> +#include <string.h> +#endif +#if defined(__i386__) +#include <ieeefp.h> +#endif + +/* + * Stability and versioning definitions. These #defines are used in the tables + * of identifiers below to fill in the attribute and version fields associated + * with each identifier. The DT_ATTR_* macros are a convenience to permit more + * concise declarations of common attributes such as Stable/Stable/Common. The + * DT_VERS_* macros declare the encoded integer values of all versions used so + * far. DT_VERS_LATEST must correspond to the latest version value among all + * versions exported by the D compiler. DT_VERS_STRING must be an ASCII string + * that contains DT_VERS_LATEST within it along with any suffixes (e.g. Beta). + * You must update DT_VERS_LATEST and DT_VERS_STRING when adding a new version, + * and then add the new version to the _dtrace_versions[] array declared below. + * Refer to the Solaris Dynamic Tracing Guide Stability and Versioning chapters + * respectively for an explanation of these DTrace features and their values. + * + * NOTE: Although the DTrace versioning scheme supports the labeling and + * introduction of incompatible changes (e.g. dropping an interface in a + * major release), the libdtrace code does not currently support this. + * All versions are assumed to strictly inherit from one another. If + * we ever need to provide divergent interfaces, this will need work. + */ +#define DT_ATTR_STABCMN { DTRACE_STABILITY_STABLE, \ + DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON } + +#define DT_ATTR_EVOLCMN { DTRACE_STABILITY_EVOLVING, \ + DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON \ +} + +/* + * The version number should be increased for every customer visible release + * of DTrace. The major number should be incremented when a fundamental + * change has been made that would affect all consumers, and would reflect + * sweeping changes to DTrace or the D language. The minor number should be + * incremented when a change is introduced that could break scripts that had + * previously worked; for example, adding a new built-in variable could break + * a script which was already using that identifier. The micro number should + * be changed when introducing functionality changes or major bug fixes that + * do not affect backward compatibility -- this is merely to make capabilities + * easily determined from the version number. Minor bugs do not require any + * modification to the version number. + */ +#define DT_VERS_1_0 DT_VERSION_NUMBER(1, 0, 0) +#define DT_VERS_1_1 DT_VERSION_NUMBER(1, 1, 0) +#define DT_VERS_1_2 DT_VERSION_NUMBER(1, 2, 0) +#define DT_VERS_1_2_1 DT_VERSION_NUMBER(1, 2, 1) +#define DT_VERS_1_2_2 DT_VERSION_NUMBER(1, 2, 2) +#define DT_VERS_1_3 DT_VERSION_NUMBER(1, 3, 0) +#define DT_VERS_1_4 DT_VERSION_NUMBER(1, 4, 0) +#define DT_VERS_1_4_1 DT_VERSION_NUMBER(1, 4, 1) +#define DT_VERS_1_5 DT_VERSION_NUMBER(1, 5, 0) +#define DT_VERS_1_6 DT_VERSION_NUMBER(1, 6, 0) +#define DT_VERS_1_6_1 DT_VERSION_NUMBER(1, 6, 1) +#define DT_VERS_1_6_2 DT_VERSION_NUMBER(1, 6, 2) +#define DT_VERS_1_6_3 DT_VERSION_NUMBER(1, 6, 3) +#define DT_VERS_1_7 DT_VERSION_NUMBER(1, 7, 0) +#define DT_VERS_1_7_1 DT_VERSION_NUMBER(1, 7, 1) +#define DT_VERS_1_8 DT_VERSION_NUMBER(1, 8, 0) +#define DT_VERS_1_8_1 DT_VERSION_NUMBER(1, 8, 1) +#define DT_VERS_1_9 DT_VERSION_NUMBER(1, 9, 0) +#define DT_VERS_1_9_1 DT_VERSION_NUMBER(1, 9, 1) +#define DT_VERS_1_10 DT_VERSION_NUMBER(1, 10, 0) +#define DT_VERS_1_11 DT_VERSION_NUMBER(1, 11, 0) +#define DT_VERS_1_12 DT_VERSION_NUMBER(1, 12, 0) +#define DT_VERS_1_12_1 DT_VERSION_NUMBER(1, 12, 1) +#define DT_VERS_LATEST DT_VERS_1_12_1 +#define DT_VERS_STRING "Sun D 1.12.1" + +const dt_version_t _dtrace_versions[] = { + DT_VERS_1_0, /* D API 1.0.0 (PSARC 2001/466) Solaris 10 FCS */ + DT_VERS_1_1, /* D API 1.1.0 Solaris Express 6/05 */ + DT_VERS_1_2, /* D API 1.2.0 Solaris 10 Update 1 */ + DT_VERS_1_2_1, /* D API 1.2.1 Solaris Express 4/06 */ + DT_VERS_1_2_2, /* D API 1.2.2 Solaris Express 6/06 */ + DT_VERS_1_3, /* D API 1.3 Solaris Express 10/06 */ + DT_VERS_1_4, /* D API 1.4 Solaris Express 2/07 */ + DT_VERS_1_4_1, /* D API 1.4.1 Solaris Express 4/07 */ + DT_VERS_1_5, /* D API 1.5 Solaris Express 7/07 */ + DT_VERS_1_6, /* D API 1.6 */ + DT_VERS_1_6_1, /* D API 1.6.1 */ + DT_VERS_1_6_2, /* D API 1.6.2 */ + DT_VERS_1_6_3, /* D API 1.6.3 */ + DT_VERS_1_7, /* D API 1.7 */ + DT_VERS_1_7_1, /* D API 1.7.1 */ + DT_VERS_1_8, /* D API 1.8 */ + DT_VERS_1_8_1, /* D API 1.8.1 */ + DT_VERS_1_9, /* D API 1.9 */ + DT_VERS_1_9_1, /* D API 1.9.1 */ + DT_VERS_1_10, /* D API 1.10 */ + DT_VERS_1_11, /* D API 1.11 */ + DT_VERS_1_12, /* D API 1.12 */ + DT_VERS_1_12_1, /* D API 1.12.1 */ + 0 +}; + +/* + * Global variables that are formatted on FreeBSD based on the kernel file name. + */ +#if !defined(sun) +static char curthread_str[MAXPATHLEN]; +static char intmtx_str[MAXPATHLEN]; +static char threadmtx_str[MAXPATHLEN]; +static char rwlock_str[MAXPATHLEN]; +static char sxlock_str[MAXPATHLEN]; +#endif + +/* + * Table of global identifiers. This is used to populate the global identifier + * hash when a new dtrace client open occurs. For more info see dt_ident.h. + * The global identifiers that represent functions use the dt_idops_func ops + * and specify the private data pointer as a prototype string which is parsed + * when the identifier is first encountered. These prototypes look like ANSI + * C function prototypes except that the special symbol "@" can be used as a + * wildcard to represent a single parameter of any type (i.e. any dt_node_t). + * The standard "..." notation can also be used to represent varargs. An empty + * parameter list is taken to mean void (that is, no arguments are permitted). + * A parameter enclosed in square brackets (e.g. "[int]") denotes an optional + * argument. + */ +static const dt_ident_t _dtrace_globals[] = { +{ "alloca", DT_IDENT_FUNC, 0, DIF_SUBR_ALLOCA, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void *(size_t)" }, +{ "arg0", DT_IDENT_SCALAR, 0, DIF_VAR_ARG0, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg1", DT_IDENT_SCALAR, 0, DIF_VAR_ARG1, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg2", DT_IDENT_SCALAR, 0, DIF_VAR_ARG2, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg3", DT_IDENT_SCALAR, 0, DIF_VAR_ARG3, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg4", DT_IDENT_SCALAR, 0, DIF_VAR_ARG4, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg5", DT_IDENT_SCALAR, 0, DIF_VAR_ARG5, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg6", DT_IDENT_SCALAR, 0, DIF_VAR_ARG6, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg7", DT_IDENT_SCALAR, 0, DIF_VAR_ARG7, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg8", DT_IDENT_SCALAR, 0, DIF_VAR_ARG8, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "arg9", DT_IDENT_SCALAR, 0, DIF_VAR_ARG9, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +{ "args", DT_IDENT_ARRAY, 0, DIF_VAR_ARGS, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_args, NULL }, +{ "avg", DT_IDENT_AGGFUNC, 0, DTRACEAGG_AVG, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@)" }, +{ "basename", DT_IDENT_FUNC, 0, DIF_SUBR_BASENAME, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "string(const char *)" }, +{ "bcopy", DT_IDENT_FUNC, 0, DIF_SUBR_BCOPY, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(void *, void *, size_t)" }, +{ "breakpoint", DT_IDENT_ACTFUNC, 0, DT_ACT_BREAKPOINT, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void()" }, +{ "caller", DT_IDENT_SCALAR, 0, DIF_VAR_CALLER, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "uintptr_t" }, +{ "chill", DT_IDENT_ACTFUNC, 0, DT_ACT_CHILL, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(int)" }, +{ "cleanpath", DT_IDENT_FUNC, 0, DIF_SUBR_CLEANPATH, DT_ATTR_STABCMN, + DT_VERS_1_0, &dt_idops_func, "string(const char *)" }, +{ "clear", DT_IDENT_ACTFUNC, 0, DT_ACT_CLEAR, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(...)" }, +{ "commit", DT_IDENT_ACTFUNC, 0, DT_ACT_COMMIT, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(int)" }, +{ "copyin", DT_IDENT_FUNC, 0, DIF_SUBR_COPYIN, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void *(uintptr_t, size_t)" }, +{ "copyinstr", DT_IDENT_FUNC, 0, DIF_SUBR_COPYINSTR, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "string(uintptr_t, [size_t])" }, +{ "copyinto", DT_IDENT_FUNC, 0, DIF_SUBR_COPYINTO, DT_ATTR_STABCMN, + DT_VERS_1_0, &dt_idops_func, "void(uintptr_t, size_t, void *)" }, +{ "copyout", DT_IDENT_FUNC, 0, DIF_SUBR_COPYOUT, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(void *, uintptr_t, size_t)" }, +{ "copyoutstr", DT_IDENT_FUNC, 0, DIF_SUBR_COPYOUTSTR, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(char *, uintptr_t, size_t)" }, +{ "count", DT_IDENT_AGGFUNC, 0, DTRACEAGG_COUNT, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void()" }, +{ "curthread", DT_IDENT_SCALAR, 0, DIF_VAR_CURTHREAD, + { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_PRIVATE, + DTRACE_CLASS_COMMON }, DT_VERS_1_0, +#if defined(sun) + &dt_idops_type, "genunix`kthread_t *" }, +#else + &dt_idops_type, curthread_str }, +#endif +{ "ddi_pathname", DT_IDENT_FUNC, 0, DIF_SUBR_DDI_PATHNAME, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "string(void *, int64_t)" }, +{ "denormalize", DT_IDENT_ACTFUNC, 0, DT_ACT_DENORMALIZE, DT_ATTR_STABCMN, + DT_VERS_1_0, &dt_idops_func, "void(...)" }, +{ "dirname", DT_IDENT_FUNC, 0, DIF_SUBR_DIRNAME, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "string(const char *)" }, +{ "discard", DT_IDENT_ACTFUNC, 0, DT_ACT_DISCARD, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(int)" }, +{ "epid", DT_IDENT_SCALAR, 0, DIF_VAR_EPID, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "uint_t" }, +{ "errno", DT_IDENT_SCALAR, 0, DIF_VAR_ERRNO, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int" }, +{ "execargs", DT_IDENT_SCALAR, 0, DIF_VAR_EXECARGS, + DT_ATTR_STABCMN, DT_VERS_1_0, &dt_idops_type, "string" }, +{ "execname", DT_IDENT_SCALAR, 0, DIF_VAR_EXECNAME, + DT_ATTR_STABCMN, DT_VERS_1_0, &dt_idops_type, "string" }, +{ "exit", DT_IDENT_ACTFUNC, 0, DT_ACT_EXIT, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(int)" }, +{ "freopen", DT_IDENT_ACTFUNC, 0, DT_ACT_FREOPEN, DT_ATTR_STABCMN, + DT_VERS_1_1, &dt_idops_func, "void(@, ...)" }, +{ "ftruncate", DT_IDENT_ACTFUNC, 0, DT_ACT_FTRUNCATE, DT_ATTR_STABCMN, + DT_VERS_1_0, &dt_idops_func, "void()" }, +{ "func", DT_IDENT_ACTFUNC, 0, DT_ACT_SYM, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_func, "_symaddr(uintptr_t)" }, +{ "getmajor", DT_IDENT_FUNC, 0, DIF_SUBR_GETMAJOR, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "genunix`major_t(genunix`dev_t)" }, +{ "getminor", DT_IDENT_FUNC, 0, DIF_SUBR_GETMINOR, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "genunix`minor_t(genunix`dev_t)" }, +{ "htonl", DT_IDENT_FUNC, 0, DIF_SUBR_HTONL, DT_ATTR_EVOLCMN, DT_VERS_1_3, + &dt_idops_func, "uint32_t(uint32_t)" }, +{ "htonll", DT_IDENT_FUNC, 0, DIF_SUBR_HTONLL, DT_ATTR_EVOLCMN, DT_VERS_1_3, + &dt_idops_func, "uint64_t(uint64_t)" }, +{ "htons", DT_IDENT_FUNC, 0, DIF_SUBR_HTONS, DT_ATTR_EVOLCMN, DT_VERS_1_3, + &dt_idops_func, "uint16_t(uint16_t)" }, +{ "getf", DT_IDENT_FUNC, 0, DIF_SUBR_GETF, DT_ATTR_STABCMN, DT_VERS_1_10, + &dt_idops_func, "file_t *(int)" }, +{ "gid", DT_IDENT_SCALAR, 0, DIF_VAR_GID, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "gid_t" }, +{ "id", DT_IDENT_SCALAR, 0, DIF_VAR_ID, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "uint_t" }, +{ "index", DT_IDENT_FUNC, 0, DIF_SUBR_INDEX, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "int(const char *, const char *, [int])" }, +{ "inet_ntoa", DT_IDENT_FUNC, 0, DIF_SUBR_INET_NTOA, DT_ATTR_STABCMN, +#if defined(sun) + DT_VERS_1_5, &dt_idops_func, "string(ipaddr_t *)" }, +#else + DT_VERS_1_5, &dt_idops_func, "string(in_addr_t *)" }, +#endif +{ "inet_ntoa6", DT_IDENT_FUNC, 0, DIF_SUBR_INET_NTOA6, DT_ATTR_STABCMN, +#if defined(sun) + DT_VERS_1_5, &dt_idops_func, "string(in6_addr_t *)" }, +#else + DT_VERS_1_5, &dt_idops_func, "string(struct in6_addr *)" }, +#endif +{ "inet_ntop", DT_IDENT_FUNC, 0, DIF_SUBR_INET_NTOP, DT_ATTR_STABCMN, + DT_VERS_1_5, &dt_idops_func, "string(int, void *)" }, +{ "ipl", DT_IDENT_SCALAR, 0, DIF_VAR_IPL, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "uint_t" }, +{ "json", DT_IDENT_FUNC, 0, DIF_SUBR_JSON, DT_ATTR_STABCMN, DT_VERS_1_11, + &dt_idops_func, "string(const char *, const char *)" }, +{ "jstack", DT_IDENT_ACTFUNC, 0, DT_ACT_JSTACK, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "stack(...)" }, +{ "lltostr", DT_IDENT_FUNC, 0, DIF_SUBR_LLTOSTR, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "string(int64_t, [int])" }, +{ "llquantize", DT_IDENT_AGGFUNC, 0, DTRACEAGG_LLQUANTIZE, DT_ATTR_STABCMN, + DT_VERS_1_7, &dt_idops_func, + "void(@, int32_t, int32_t, int32_t, int32_t, ...)" }, +{ "lquantize", DT_IDENT_AGGFUNC, 0, DTRACEAGG_LQUANTIZE, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@, int32_t, int32_t, ...)" }, +{ "max", DT_IDENT_AGGFUNC, 0, DTRACEAGG_MAX, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@)" }, +{ "memref", DT_IDENT_FUNC, 0, DIF_SUBR_MEMREF, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "uintptr_t *(void *, size_t)" }, +#if !defined(sun) +{ "memstr", DT_IDENT_FUNC, 0, DIF_SUBR_MEMSTR, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "string(void *, char, size_t)" }, +#endif +{ "min", DT_IDENT_AGGFUNC, 0, DTRACEAGG_MIN, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@)" }, +{ "mod", DT_IDENT_ACTFUNC, 0, DT_ACT_MOD, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_func, "_symaddr(uintptr_t)" }, +{ "msgdsize", DT_IDENT_FUNC, 0, DIF_SUBR_MSGDSIZE, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "size_t(mblk_t *)" }, +{ "msgsize", DT_IDENT_FUNC, 0, DIF_SUBR_MSGSIZE, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "size_t(mblk_t *)" }, +#if defined(sun) +{ "mutex_owned", DT_IDENT_FUNC, 0, DIF_SUBR_MUTEX_OWNED, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "int(genunix`kmutex_t *)" }, +{ "mutex_owner", DT_IDENT_FUNC, 0, DIF_SUBR_MUTEX_OWNER, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "genunix`kthread_t *(genunix`kmutex_t *)" }, +{ "mutex_type_adaptive", DT_IDENT_FUNC, 0, DIF_SUBR_MUTEX_TYPE_ADAPTIVE, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "int(genunix`kmutex_t *)" }, +{ "mutex_type_spin", DT_IDENT_FUNC, 0, DIF_SUBR_MUTEX_TYPE_SPIN, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "int(genunix`kmutex_t *)" }, +#else +{ "mutex_owned", DT_IDENT_FUNC, 0, DIF_SUBR_MUTEX_OWNED, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, intmtx_str }, +{ "mutex_owner", DT_IDENT_FUNC, 0, DIF_SUBR_MUTEX_OWNER, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, threadmtx_str }, +{ "mutex_type_adaptive", DT_IDENT_FUNC, 0, DIF_SUBR_MUTEX_TYPE_ADAPTIVE, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, intmtx_str }, +{ "mutex_type_spin", DT_IDENT_FUNC, 0, DIF_SUBR_MUTEX_TYPE_SPIN, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, intmtx_str }, +#endif +{ "ntohl", DT_IDENT_FUNC, 0, DIF_SUBR_NTOHL, DT_ATTR_EVOLCMN, DT_VERS_1_3, + &dt_idops_func, "uint32_t(uint32_t)" }, +{ "ntohll", DT_IDENT_FUNC, 0, DIF_SUBR_NTOHLL, DT_ATTR_EVOLCMN, DT_VERS_1_3, + &dt_idops_func, "uint64_t(uint64_t)" }, +{ "ntohs", DT_IDENT_FUNC, 0, DIF_SUBR_NTOHS, DT_ATTR_EVOLCMN, DT_VERS_1_3, + &dt_idops_func, "uint16_t(uint16_t)" }, +{ "normalize", DT_IDENT_ACTFUNC, 0, DT_ACT_NORMALIZE, DT_ATTR_STABCMN, + DT_VERS_1_0, &dt_idops_func, "void(...)" }, +{ "panic", DT_IDENT_ACTFUNC, 0, DT_ACT_PANIC, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void()" }, +{ "pid", DT_IDENT_SCALAR, 0, DIF_VAR_PID, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "pid_t" }, +{ "ppid", DT_IDENT_SCALAR, 0, DIF_VAR_PPID, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "pid_t" }, +{ "print", DT_IDENT_ACTFUNC, 0, DT_ACT_PRINT, DT_ATTR_STABCMN, DT_VERS_1_9, + &dt_idops_func, "void(@)" }, +{ "printa", DT_IDENT_ACTFUNC, 0, DT_ACT_PRINTA, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@, ...)" }, +{ "printf", DT_IDENT_ACTFUNC, 0, DT_ACT_PRINTF, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@, ...)" }, +{ "printm", DT_IDENT_ACTFUNC, 0, DT_ACT_PRINTM, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(size_t, uintptr_t *)" }, +{ "printt", DT_IDENT_ACTFUNC, 0, DT_ACT_PRINTT, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(size_t, uintptr_t *)" }, +{ "probefunc", DT_IDENT_SCALAR, 0, DIF_VAR_PROBEFUNC, + DT_ATTR_STABCMN, DT_VERS_1_0, &dt_idops_type, "string" }, +{ "probemod", DT_IDENT_SCALAR, 0, DIF_VAR_PROBEMOD, + DT_ATTR_STABCMN, DT_VERS_1_0, &dt_idops_type, "string" }, +{ "probename", DT_IDENT_SCALAR, 0, DIF_VAR_PROBENAME, + DT_ATTR_STABCMN, DT_VERS_1_0, &dt_idops_type, "string" }, +{ "probeprov", DT_IDENT_SCALAR, 0, DIF_VAR_PROBEPROV, + DT_ATTR_STABCMN, DT_VERS_1_0, &dt_idops_type, "string" }, +{ "progenyof", DT_IDENT_FUNC, 0, DIF_SUBR_PROGENYOF, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "int(pid_t)" }, +{ "quantize", DT_IDENT_AGGFUNC, 0, DTRACEAGG_QUANTIZE, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@, ...)" }, +{ "raise", DT_IDENT_ACTFUNC, 0, DT_ACT_RAISE, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(int)" }, +{ "rand", DT_IDENT_FUNC, 0, DIF_SUBR_RAND, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "int()" }, +{ "rindex", DT_IDENT_FUNC, 0, DIF_SUBR_RINDEX, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "int(const char *, const char *, [int])" }, +#if defined(sun) +{ "rw_iswriter", DT_IDENT_FUNC, 0, DIF_SUBR_RW_ISWRITER, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "int(genunix`krwlock_t *)" }, +{ "rw_read_held", DT_IDENT_FUNC, 0, DIF_SUBR_RW_READ_HELD, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "int(genunix`krwlock_t *)" }, +{ "rw_write_held", DT_IDENT_FUNC, 0, DIF_SUBR_RW_WRITE_HELD, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, "int(genunix`krwlock_t *)" }, +#else +{ "rw_iswriter", DT_IDENT_FUNC, 0, DIF_SUBR_RW_ISWRITER, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, rwlock_str }, +{ "rw_read_held", DT_IDENT_FUNC, 0, DIF_SUBR_RW_READ_HELD, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, rwlock_str }, +{ "rw_write_held", DT_IDENT_FUNC, 0, DIF_SUBR_RW_WRITE_HELD, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, rwlock_str }, +#endif +{ "self", DT_IDENT_PTR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "void" }, +{ "setopt", DT_IDENT_ACTFUNC, 0, DT_ACT_SETOPT, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_func, "void(const char *, [const char *])" }, +{ "speculate", DT_IDENT_ACTFUNC, 0, DT_ACT_SPECULATE, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(int)" }, +{ "speculation", DT_IDENT_FUNC, 0, DIF_SUBR_SPECULATION, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "int()" }, +{ "stack", DT_IDENT_ACTFUNC, 0, DT_ACT_STACK, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "stack(...)" }, +{ "stackdepth", DT_IDENT_SCALAR, 0, DIF_VAR_STACKDEPTH, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "uint32_t" }, +{ "stddev", DT_IDENT_AGGFUNC, 0, DTRACEAGG_STDDEV, DT_ATTR_STABCMN, + DT_VERS_1_6, &dt_idops_func, "void(@)" }, +{ "stop", DT_IDENT_ACTFUNC, 0, DT_ACT_STOP, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void()" }, +{ "strchr", DT_IDENT_FUNC, 0, DIF_SUBR_STRCHR, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "string(const char *, char)" }, +{ "strlen", DT_IDENT_FUNC, 0, DIF_SUBR_STRLEN, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "size_t(const char *)" }, +{ "strjoin", DT_IDENT_FUNC, 0, DIF_SUBR_STRJOIN, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "string(const char *, const char *)" }, +{ "strrchr", DT_IDENT_FUNC, 0, DIF_SUBR_STRRCHR, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "string(const char *, char)" }, +{ "strstr", DT_IDENT_FUNC, 0, DIF_SUBR_STRSTR, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "string(const char *, const char *)" }, +{ "strtok", DT_IDENT_FUNC, 0, DIF_SUBR_STRTOK, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "string(const char *, const char *)" }, +{ "strtoll", DT_IDENT_FUNC, 0, DIF_SUBR_STRTOLL, DT_ATTR_STABCMN, DT_VERS_1_11, + &dt_idops_func, "int64_t(const char *, [int])" }, +{ "substr", DT_IDENT_FUNC, 0, DIF_SUBR_SUBSTR, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "string(const char *, int, [int])" }, +{ "sum", DT_IDENT_AGGFUNC, 0, DTRACEAGG_SUM, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@)" }, +#if !defined(sun) +{ "sx_isexclusive", DT_IDENT_FUNC, 0, DIF_SUBR_SX_ISEXCLUSIVE, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, sxlock_str }, +{ "sx_shared_held", DT_IDENT_FUNC, 0, DIF_SUBR_SX_SHARED_HELD, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, sxlock_str }, +{ "sx_exclusive_held", DT_IDENT_FUNC, 0, DIF_SUBR_SX_EXCLUSIVE_HELD, + DT_ATTR_EVOLCMN, DT_VERS_1_0, + &dt_idops_func, sxlock_str }, +#endif +{ "sym", DT_IDENT_ACTFUNC, 0, DT_ACT_SYM, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_func, "_symaddr(uintptr_t)" }, +{ "system", DT_IDENT_ACTFUNC, 0, DT_ACT_SYSTEM, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@, ...)" }, +{ "this", DT_IDENT_PTR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "void" }, +{ "tid", DT_IDENT_SCALAR, 0, DIF_VAR_TID, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "id_t" }, +{ "timestamp", DT_IDENT_SCALAR, 0, DIF_VAR_TIMESTAMP, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "uint64_t" }, +{ "tolower", DT_IDENT_FUNC, 0, DIF_SUBR_TOLOWER, DT_ATTR_STABCMN, DT_VERS_1_8, + &dt_idops_func, "string(const char *)" }, +{ "toupper", DT_IDENT_FUNC, 0, DIF_SUBR_TOUPPER, DT_ATTR_STABCMN, DT_VERS_1_8, + &dt_idops_func, "string(const char *)" }, +{ "trace", DT_IDENT_ACTFUNC, 0, DT_ACT_TRACE, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@)" }, +{ "tracemem", DT_IDENT_ACTFUNC, 0, DT_ACT_TRACEMEM, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "void(@, size_t, ...)" }, +{ "trunc", DT_IDENT_ACTFUNC, 0, DT_ACT_TRUNC, DT_ATTR_STABCMN, + DT_VERS_1_0, &dt_idops_func, "void(...)" }, +{ "typeref", DT_IDENT_FUNC, 0, DIF_SUBR_TYPEREF, DT_ATTR_STABCMN, DT_VERS_1_1, + &dt_idops_func, "uintptr_t *(void *, size_t, string, size_t)" }, +{ "uaddr", DT_IDENT_ACTFUNC, 0, DT_ACT_UADDR, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_func, "_usymaddr(uintptr_t)" }, +{ "ucaller", DT_IDENT_SCALAR, 0, DIF_VAR_UCALLER, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_type, "uint64_t" }, +{ "ufunc", DT_IDENT_ACTFUNC, 0, DT_ACT_USYM, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_func, "_usymaddr(uintptr_t)" }, +{ "uid", DT_IDENT_SCALAR, 0, DIF_VAR_UID, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "uid_t" }, +{ "umod", DT_IDENT_ACTFUNC, 0, DT_ACT_UMOD, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_func, "_usymaddr(uintptr_t)" }, +{ "uregs", DT_IDENT_ARRAY, 0, DIF_VAR_UREGS, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_regs, NULL }, +{ "ustack", DT_IDENT_ACTFUNC, 0, DT_ACT_USTACK, DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_func, "stack(...)" }, +{ "ustackdepth", DT_IDENT_SCALAR, 0, DIF_VAR_USTACKDEPTH, + DT_ATTR_STABCMN, DT_VERS_1_2, + &dt_idops_type, "uint32_t" }, +{ "usym", DT_IDENT_ACTFUNC, 0, DT_ACT_USYM, DT_ATTR_STABCMN, + DT_VERS_1_2, &dt_idops_func, "_usymaddr(uintptr_t)" }, +{ "vtimestamp", DT_IDENT_SCALAR, 0, DIF_VAR_VTIMESTAMP, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "uint64_t" }, +{ "walltimestamp", DT_IDENT_SCALAR, 0, DIF_VAR_WALLTIMESTAMP, + DT_ATTR_STABCMN, DT_VERS_1_0, + &dt_idops_type, "int64_t" }, +#if defined(sun) +{ "zonename", DT_IDENT_SCALAR, 0, DIF_VAR_ZONENAME, + DT_ATTR_STABCMN, DT_VERS_1_0, &dt_idops_type, "string" }, +#endif + +#if !defined(sun) +{ "cpu", DT_IDENT_SCALAR, 0, DIF_VAR_CPU, + DT_ATTR_STABCMN, DT_VERS_1_6_3, &dt_idops_type, "int" }, +#endif + +{ NULL, 0, 0, 0, { 0, 0, 0 }, 0, NULL, NULL } +}; + +/* + * Tables of ILP32 intrinsic integer and floating-point type templates to use + * to populate the dynamic "C" CTF type container. + */ +static const dt_intrinsic_t _dtrace_intrinsics_32[] = { +{ "void", { CTF_INT_SIGNED, 0, 0 }, CTF_K_INTEGER }, +{ "signed", { CTF_INT_SIGNED, 0, 32 }, CTF_K_INTEGER }, +{ "unsigned", { 0, 0, 32 }, CTF_K_INTEGER }, +{ "char", { CTF_INT_SIGNED | CTF_INT_CHAR, 0, 8 }, CTF_K_INTEGER }, +{ "short", { CTF_INT_SIGNED, 0, 16 }, CTF_K_INTEGER }, +{ "int", { CTF_INT_SIGNED, 0, 32 }, CTF_K_INTEGER }, +{ "long", { CTF_INT_SIGNED, 0, 32 }, CTF_K_INTEGER }, +{ "long long", { CTF_INT_SIGNED, 0, 64 }, CTF_K_INTEGER }, +{ "signed char", { CTF_INT_SIGNED | CTF_INT_CHAR, 0, 8 }, CTF_K_INTEGER }, +{ "signed short", { CTF_INT_SIGNED, 0, 16 }, CTF_K_INTEGER }, +{ "signed int", { CTF_INT_SIGNED, 0, 32 }, CTF_K_INTEGER }, +{ "signed long", { CTF_INT_SIGNED, 0, 32 }, CTF_K_INTEGER }, +{ "signed long long", { CTF_INT_SIGNED, 0, 64 }, CTF_K_INTEGER }, +{ "unsigned char", { CTF_INT_CHAR, 0, 8 }, CTF_K_INTEGER }, +{ "unsigned short", { 0, 0, 16 }, CTF_K_INTEGER }, +{ "unsigned int", { 0, 0, 32 }, CTF_K_INTEGER }, +{ "unsigned long", { 0, 0, 32 }, CTF_K_INTEGER }, +{ "unsigned long long", { 0, 0, 64 }, CTF_K_INTEGER }, +{ "_Bool", { CTF_INT_BOOL, 0, 8 }, CTF_K_INTEGER }, +{ "float", { CTF_FP_SINGLE, 0, 32 }, CTF_K_FLOAT }, +{ "double", { CTF_FP_DOUBLE, 0, 64 }, CTF_K_FLOAT }, +{ "long double", { CTF_FP_LDOUBLE, 0, 128 }, CTF_K_FLOAT }, +{ "float imaginary", { CTF_FP_IMAGRY, 0, 32 }, CTF_K_FLOAT }, +{ "double imaginary", { CTF_FP_DIMAGRY, 0, 64 }, CTF_K_FLOAT }, +{ "long double imaginary", { CTF_FP_LDIMAGRY, 0, 128 }, CTF_K_FLOAT }, +{ "float complex", { CTF_FP_CPLX, 0, 64 }, CTF_K_FLOAT }, +{ "double complex", { CTF_FP_DCPLX, 0, 128 }, CTF_K_FLOAT }, +{ "long double complex", { CTF_FP_LDCPLX, 0, 256 }, CTF_K_FLOAT }, +{ NULL, { 0, 0, 0 }, 0 } +}; + +/* + * Tables of LP64 intrinsic integer and floating-point type templates to use + * to populate the dynamic "C" CTF type container. + */ +static const dt_intrinsic_t _dtrace_intrinsics_64[] = { +{ "void", { CTF_INT_SIGNED, 0, 0 }, CTF_K_INTEGER }, +{ "signed", { CTF_INT_SIGNED, 0, 32 }, CTF_K_INTEGER }, +{ "unsigned", { 0, 0, 32 }, CTF_K_INTEGER }, +{ "char", { CTF_INT_SIGNED | CTF_INT_CHAR, 0, 8 }, CTF_K_INTEGER }, +{ "short", { CTF_INT_SIGNED, 0, 16 }, CTF_K_INTEGER }, +{ "int", { CTF_INT_SIGNED, 0, 32 }, CTF_K_INTEGER }, +{ "long", { CTF_INT_SIGNED, 0, 64 }, CTF_K_INTEGER }, +{ "long long", { CTF_INT_SIGNED, 0, 64 }, CTF_K_INTEGER }, +{ "signed char", { CTF_INT_SIGNED | CTF_INT_CHAR, 0, 8 }, CTF_K_INTEGER }, +{ "signed short", { CTF_INT_SIGNED, 0, 16 }, CTF_K_INTEGER }, +{ "signed int", { CTF_INT_SIGNED, 0, 32 }, CTF_K_INTEGER }, +{ "signed long", { CTF_INT_SIGNED, 0, 64 }, CTF_K_INTEGER }, +{ "signed long long", { CTF_INT_SIGNED, 0, 64 }, CTF_K_INTEGER }, +{ "unsigned char", { CTF_INT_CHAR, 0, 8 }, CTF_K_INTEGER }, +{ "unsigned short", { 0, 0, 16 }, CTF_K_INTEGER }, +{ "unsigned int", { 0, 0, 32 }, CTF_K_INTEGER }, +{ "unsigned long", { 0, 0, 64 }, CTF_K_INTEGER }, +{ "unsigned long long", { 0, 0, 64 }, CTF_K_INTEGER }, +{ "_Bool", { CTF_INT_BOOL, 0, 8 }, CTF_K_INTEGER }, +{ "float", { CTF_FP_SINGLE, 0, 32 }, CTF_K_FLOAT }, +{ "double", { CTF_FP_DOUBLE, 0, 64 }, CTF_K_FLOAT }, +{ "long double", { CTF_FP_LDOUBLE, 0, 128 }, CTF_K_FLOAT }, +{ "float imaginary", { CTF_FP_IMAGRY, 0, 32 }, CTF_K_FLOAT }, +{ "double imaginary", { CTF_FP_DIMAGRY, 0, 64 }, CTF_K_FLOAT }, +{ "long double imaginary", { CTF_FP_LDIMAGRY, 0, 128 }, CTF_K_FLOAT }, +{ "float complex", { CTF_FP_CPLX, 0, 64 }, CTF_K_FLOAT }, +{ "double complex", { CTF_FP_DCPLX, 0, 128 }, CTF_K_FLOAT }, +{ "long double complex", { CTF_FP_LDCPLX, 0, 256 }, CTF_K_FLOAT }, +{ NULL, { 0, 0, 0 }, 0 } +}; + +/* + * Tables of ILP32 typedefs to use to populate the dynamic "D" CTF container. + * These aliases ensure that D definitions can use typical <sys/types.h> names. + */ +static const dt_typedef_t _dtrace_typedefs_32[] = { +{ "char", "int8_t" }, +{ "short", "int16_t" }, +{ "int", "int32_t" }, +{ "long long", "int64_t" }, +{ "int", "intptr_t" }, +{ "int", "ssize_t" }, +{ "unsigned char", "uint8_t" }, +{ "unsigned short", "uint16_t" }, +{ "unsigned", "uint32_t" }, +{ "unsigned long long", "uint64_t" }, +{ "unsigned char", "uchar_t" }, +{ "unsigned short", "ushort_t" }, +{ "unsigned", "uint_t" }, +{ "unsigned long", "ulong_t" }, +{ "unsigned long long", "u_longlong_t" }, +{ "int", "ptrdiff_t" }, +{ "unsigned", "uintptr_t" }, +{ "unsigned", "size_t" }, +{ "long", "id_t" }, +{ "long", "pid_t" }, +{ NULL, NULL } +}; + +/* + * Tables of LP64 typedefs to use to populate the dynamic "D" CTF container. + * These aliases ensure that D definitions can use typical <sys/types.h> names. + */ +static const dt_typedef_t _dtrace_typedefs_64[] = { +{ "char", "int8_t" }, +{ "short", "int16_t" }, +{ "int", "int32_t" }, +{ "long", "int64_t" }, +{ "long", "intptr_t" }, +{ "long", "ssize_t" }, +{ "unsigned char", "uint8_t" }, +{ "unsigned short", "uint16_t" }, +{ "unsigned", "uint32_t" }, +{ "unsigned long", "uint64_t" }, +{ "unsigned char", "uchar_t" }, +{ "unsigned short", "ushort_t" }, +{ "unsigned", "uint_t" }, +{ "unsigned long", "ulong_t" }, +{ "unsigned long long", "u_longlong_t" }, +{ "long", "ptrdiff_t" }, +{ "unsigned long", "uintptr_t" }, +{ "unsigned long", "size_t" }, +{ "int", "id_t" }, +{ "int", "pid_t" }, +{ NULL, NULL } +}; + +/* + * Tables of ILP32 integer type templates used to populate the dtp->dt_ints[] + * cache when a new dtrace client open occurs. Values are set by dtrace_open(). + */ +static const dt_intdesc_t _dtrace_ints_32[] = { +{ "int", NULL, CTF_ERR, 0x7fffffffULL }, +{ "unsigned int", NULL, CTF_ERR, 0xffffffffULL }, +{ "long", NULL, CTF_ERR, 0x7fffffffULL }, +{ "unsigned long", NULL, CTF_ERR, 0xffffffffULL }, +{ "long long", NULL, CTF_ERR, 0x7fffffffffffffffULL }, +{ "unsigned long long", NULL, CTF_ERR, 0xffffffffffffffffULL } +}; + +/* + * Tables of LP64 integer type templates used to populate the dtp->dt_ints[] + * cache when a new dtrace client open occurs. Values are set by dtrace_open(). + */ +static const dt_intdesc_t _dtrace_ints_64[] = { +{ "int", NULL, CTF_ERR, 0x7fffffffULL }, +{ "unsigned int", NULL, CTF_ERR, 0xffffffffULL }, +{ "long", NULL, CTF_ERR, 0x7fffffffffffffffULL }, +{ "unsigned long", NULL, CTF_ERR, 0xffffffffffffffffULL }, +{ "long long", NULL, CTF_ERR, 0x7fffffffffffffffULL }, +{ "unsigned long long", NULL, CTF_ERR, 0xffffffffffffffffULL } +}; + +/* + * Table of macro variable templates used to populate the macro identifier hash + * when a new dtrace client open occurs. Values are set by dtrace_update(). + */ +static const dt_ident_t _dtrace_macros[] = { +{ "egid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "euid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "gid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "pid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "pgid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "ppid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "projid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "sid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "taskid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "target", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ "uid", DT_IDENT_SCALAR, 0, 0, DT_ATTR_STABCMN, DT_VERS_1_0 }, +{ NULL, 0, 0, 0, { 0, 0, 0 }, 0 } +}; + +/* + * Hard-wired definition string to be compiled and cached every time a new + * DTrace library handle is initialized. This string should only be used to + * contain definitions that should be present regardless of DTRACE_O_NOLIBS. + */ +static const char _dtrace_hardwire[] = "\ +inline long NULL = 0; \n\ +#pragma D binding \"1.0\" NULL\n\ +"; + +/* + * Default DTrace configuration to use when opening libdtrace DTRACE_O_NODEV. + * If DTRACE_O_NODEV is not set, we load the configuration from the kernel. + * The use of CTF_MODEL_NATIVE is more subtle than it might appear: we are + * relying on the fact that when running dtrace(1M), isaexec will invoke the + * binary with the same bitness as the kernel, which is what we want by default + * when generating our DIF. The user can override the choice using oflags. + */ +static const dtrace_conf_t _dtrace_conf = { + DIF_VERSION, /* dtc_difversion */ + DIF_DIR_NREGS, /* dtc_difintregs */ + DIF_DTR_NREGS, /* dtc_diftupregs */ + CTF_MODEL_NATIVE /* dtc_ctfmodel */ +}; + +const dtrace_attribute_t _dtrace_maxattr = { + DTRACE_STABILITY_MAX, + DTRACE_STABILITY_MAX, + DTRACE_CLASS_MAX +}; + +const dtrace_attribute_t _dtrace_defattr = { + DTRACE_STABILITY_STABLE, + DTRACE_STABILITY_STABLE, + DTRACE_CLASS_COMMON +}; + +const dtrace_attribute_t _dtrace_symattr = { + DTRACE_STABILITY_PRIVATE, + DTRACE_STABILITY_PRIVATE, + DTRACE_CLASS_UNKNOWN +}; + +const dtrace_attribute_t _dtrace_typattr = { + DTRACE_STABILITY_PRIVATE, + DTRACE_STABILITY_PRIVATE, + DTRACE_CLASS_UNKNOWN +}; + +const dtrace_attribute_t _dtrace_prvattr = { + DTRACE_STABILITY_PRIVATE, + DTRACE_STABILITY_PRIVATE, + DTRACE_CLASS_UNKNOWN +}; + +const dtrace_pattr_t _dtrace_prvdesc = { +{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_COMMON }, +}; + +#if defined(sun) +const char *_dtrace_defcpp = "/usr/ccs/lib/cpp"; /* default cpp(1) to invoke */ +const char *_dtrace_defld = "/usr/ccs/bin/ld"; /* default ld(1) to invoke */ +#else +const char *_dtrace_defcpp = "cpp"; /* default cpp(1) to invoke */ +const char *_dtrace_defld = "ld"; /* default ld(1) to invoke */ +#endif + +const char *_dtrace_libdir = "/usr/lib/dtrace"; /* default library directory */ +#if defined(sun) +const char *_dtrace_provdir = "/dev/dtrace/provider"; /* provider directory */ +#else +const char *_dtrace_provdir = "/dev/dtrace"; /* provider directory */ +#endif + +int _dtrace_strbuckets = 211; /* default number of hash buckets (prime) */ +int _dtrace_intbuckets = 256; /* default number of integer buckets (Pof2) */ +uint_t _dtrace_strsize = 256; /* default size of string intrinsic type */ +uint_t _dtrace_stkindent = 14; /* default whitespace indent for stack/ustack */ +uint_t _dtrace_pidbuckets = 64; /* default number of pid hash buckets */ +uint_t _dtrace_pidlrulim = 8; /* default number of pid handles to cache */ +size_t _dtrace_bufsize = 512; /* default dt_buf_create() size */ +int _dtrace_argmax = 32; /* default maximum number of probe arguments */ + +int _dtrace_debug = 0; /* debug messages enabled (off) */ +const char *const _dtrace_version = DT_VERS_STRING; /* API version string */ +int _dtrace_rdvers = RD_VERSION; /* rtld_db feature version */ + +typedef struct dt_fdlist { + int *df_fds; /* array of provider driver file descriptors */ + uint_t df_ents; /* number of valid elements in df_fds[] */ + uint_t df_size; /* size of df_fds[] */ +} dt_fdlist_t; + +#if defined(sun) +#pragma init(_dtrace_init) +#else +void _dtrace_init(void) __attribute__ ((constructor)); +#endif +void +_dtrace_init(void) +{ + _dtrace_debug = getenv("DTRACE_DEBUG") != NULL; + + for (; _dtrace_rdvers > 0; _dtrace_rdvers--) { + if (rd_init(_dtrace_rdvers) == RD_OK) + break; + } +#if defined(__i386__) + /* make long doubles 64 bits -sson */ + (void) fpsetprec(FP_PE); +#endif +} + +static dtrace_hdl_t * +set_open_errno(dtrace_hdl_t *dtp, int *errp, int err) +{ + if (dtp != NULL) + dtrace_close(dtp); + if (errp != NULL) + *errp = err; + return (NULL); +} + +static void +dt_provmod_open(dt_provmod_t **provmod, dt_fdlist_t *dfp) +{ + dt_provmod_t *prov; + char path[PATH_MAX]; + int fd; +#if defined(sun) + struct dirent *dp, *ep; + DIR *dirp; + + if ((dirp = opendir(_dtrace_provdir)) == NULL) + return; /* failed to open directory; just skip it */ + + ep = alloca(sizeof (struct dirent) + PATH_MAX + 1); + bzero(ep, sizeof (struct dirent) + PATH_MAX + 1); + + while (readdir_r(dirp, ep, &dp) == 0 && dp != NULL) { + if (dp->d_name[0] == '.') + continue; /* skip "." and ".." */ + + if (dfp->df_ents == dfp->df_size) { + uint_t size = dfp->df_size ? dfp->df_size * 2 : 16; + int *fds = realloc(dfp->df_fds, size * sizeof (int)); + + if (fds == NULL) + break; /* skip the rest of this directory */ + + dfp->df_fds = fds; + dfp->df_size = size; + } + + (void) snprintf(path, sizeof (path), "%s/%s", + _dtrace_provdir, dp->d_name); + + if ((fd = open(path, O_RDONLY)) == -1) + continue; /* failed to open driver; just skip it */ + + if (((prov = malloc(sizeof (dt_provmod_t))) == NULL) || + (prov->dp_name = malloc(strlen(dp->d_name) + 1)) == NULL) { + free(prov); + (void) close(fd); + break; + } + + (void) strcpy(prov->dp_name, dp->d_name); + prov->dp_next = *provmod; + *provmod = prov; + + dt_dprintf("opened provider %s\n", dp->d_name); + dfp->df_fds[dfp->df_ents++] = fd; + } + + (void) closedir(dirp); +#else + char *p; + char *p1; + char *p_providers = NULL; + int error; + size_t len = 0; + + /* + * Loop to allocate/reallocate memory for the string of provider + * names and retry: + */ + while(1) { + /* + * The first time around, get the string length. The next time, + * hopefully we've allocated enough memory. + */ + error = sysctlbyname("debug.dtrace.providers",p_providers,&len,NULL,0); + if (len == 0) + /* No providers? That's strange. Where's dtrace? */ + break; + else if (error == 0 && p_providers == NULL) { + /* + * Allocate the initial memory which should be enough + * unless another provider loads before we have + * time to go back and get the string. + */ + if ((p_providers = malloc(len)) == NULL) + /* How do we report errors here? */ + return; + } else if (error == -1 && errno == ENOMEM) { + /* + * The current buffer isn't large enough, so + * reallocate it. We normally won't need to do this + * because providers aren't being loaded all the time. + */ + if ((p = realloc(p_providers,len)) == NULL) + /* How do we report errors here? */ + return; + p_providers = p; + } else + break; + } + + /* Check if we got a string of provider names: */ + if (error == 0 && len > 0 && p_providers != NULL) { + p = p_providers; + + /* + * Parse the string containing the space separated + * provider names. + */ + while ((p1 = strsep(&p," ")) != NULL) { + if (dfp->df_ents == dfp->df_size) { + uint_t size = dfp->df_size ? dfp->df_size * 2 : 16; + int *fds = realloc(dfp->df_fds, size * sizeof (int)); + + if (fds == NULL) + break; + + dfp->df_fds = fds; + dfp->df_size = size; + } + + (void) snprintf(path, sizeof (path), "/dev/dtrace/%s", p1); + + if ((fd = open(path, O_RDONLY)) == -1) + continue; /* failed to open driver; just skip it */ + + if (((prov = malloc(sizeof (dt_provmod_t))) == NULL) || + (prov->dp_name = malloc(strlen(p1) + 1)) == NULL) { + free(prov); + (void) close(fd); + break; + } + + (void) strcpy(prov->dp_name, p1); + prov->dp_next = *provmod; + *provmod = prov; + + dt_dprintf("opened provider %s\n", p1); + dfp->df_fds[dfp->df_ents++] = fd; + } + } + if (p_providers != NULL) + free(p_providers); +#endif +} + +static void +dt_provmod_destroy(dt_provmod_t **provmod) +{ + dt_provmod_t *next, *current; + + for (current = *provmod; current != NULL; current = next) { + next = current->dp_next; + free(current->dp_name); + free(current); + } + + *provmod = NULL; +} + +#if defined(sun) +static const char * +dt_get_sysinfo(int cmd, char *buf, size_t len) +{ + ssize_t rv = sysinfo(cmd, buf, len); + char *p = buf; + + if (rv < 0 || rv > len) + (void) snprintf(buf, len, "%s", "Unknown"); + + while ((p = strchr(p, '.')) != NULL) + *p++ = '_'; + + return (buf); +} +#endif + +static dtrace_hdl_t * +dt_vopen(int version, int flags, int *errp, + const dtrace_vector_t *vector, void *arg) +{ + dtrace_hdl_t *dtp = NULL; + int dtfd = -1, ftfd = -1, fterr = 0; + dtrace_prog_t *pgp; + dt_module_t *dmp; + dt_provmod_t *provmod = NULL; + int i, err; + struct rlimit rl; + + const dt_intrinsic_t *dinp; + const dt_typedef_t *dtyp; + const dt_ident_t *idp; + + dtrace_typeinfo_t dtt; + ctf_funcinfo_t ctc; + ctf_arinfo_t ctr; + + dt_fdlist_t df = { NULL, 0, 0 }; + + char isadef[32], utsdef[32]; + char s1[64], s2[64]; + + if (version <= 0) + return (set_open_errno(dtp, errp, EINVAL)); + + if (version > DTRACE_VERSION) + return (set_open_errno(dtp, errp, EDT_VERSION)); + + if (version < DTRACE_VERSION) { + /* + * Currently, increasing the library version number is used to + * denote a binary incompatible change. That is, a consumer + * of the library cannot run on a version of the library with + * a higher DTRACE_VERSION number than the consumer compiled + * against. Once the library API has been committed to, + * backwards binary compatibility will be required; at that + * time, this check should change to return EDT_OVERSION only + * if the specified version number is less than the version + * number at the time of interface commitment. + */ + return (set_open_errno(dtp, errp, EDT_OVERSION)); + } + + if (flags & ~DTRACE_O_MASK) + return (set_open_errno(dtp, errp, EINVAL)); + + if ((flags & DTRACE_O_LP64) && (flags & DTRACE_O_ILP32)) + return (set_open_errno(dtp, errp, EINVAL)); + + if (vector == NULL && arg != NULL) + return (set_open_errno(dtp, errp, EINVAL)); + + if (elf_version(EV_CURRENT) == EV_NONE) + return (set_open_errno(dtp, errp, EDT_ELFVERSION)); + + if (vector != NULL || (flags & DTRACE_O_NODEV)) + goto alloc; /* do not attempt to open dtrace device */ + + /* + * Before we get going, crank our limit on file descriptors up to the + * hard limit. This is to allow for the fact that libproc keeps file + * descriptors to objects open for the lifetime of the proc handle; + * without raising our hard limit, we would have an acceptably small + * bound on the number of processes that we could concurrently + * instrument with the pid provider. + */ + if (getrlimit(RLIMIT_NOFILE, &rl) == 0) { + rl.rlim_cur = rl.rlim_max; + (void) setrlimit(RLIMIT_NOFILE, &rl); + } + + /* + * Get the device path of each of the providers. We hold them open + * in the df.df_fds list until we open the DTrace driver itself, + * allowing us to see all of the probes provided on this system. Once + * we have the DTrace driver open, we can safely close all the providers + * now that they have registered with the framework. + */ + dt_provmod_open(&provmod, &df); + + dtfd = open("/dev/dtrace/dtrace", O_RDWR); + err = errno; /* save errno from opening dtfd */ +#if defined(__FreeBSD__) + /* + * Automatically load the 'dtraceall' module if we couldn't open the + * char device. + */ + if (err == ENOENT && modfind("dtraceall") < 0) { + kldload("dtraceall"); /* ignore the error */ + dtfd = open("/dev/dtrace/dtrace", O_RDWR); + err = errno; + } +#endif +#if defined(sun) + ftfd = open("/dev/dtrace/provider/fasttrap", O_RDWR); +#else + ftfd = open("/dev/dtrace/fasttrap", O_RDWR); +#endif + fterr = ftfd == -1 ? errno : 0; /* save errno from open ftfd */ + + while (df.df_ents-- != 0) + (void) close(df.df_fds[df.df_ents]); + + free(df.df_fds); + + /* + * If we failed to open the dtrace device, fail dtrace_open(). + * We convert some kernel errnos to custom libdtrace errnos to + * improve the resulting message from the usual strerror(). + */ + if (dtfd == -1) { + dt_provmod_destroy(&provmod); + switch (err) { + case ENOENT: + err = EDT_NOENT; + break; + case EBUSY: + err = EDT_BUSY; + break; + case EACCES: + err = EDT_ACCESS; + break; + } + return (set_open_errno(dtp, errp, err)); + } + + (void) fcntl(dtfd, F_SETFD, FD_CLOEXEC); + (void) fcntl(ftfd, F_SETFD, FD_CLOEXEC); + +alloc: + if ((dtp = malloc(sizeof (dtrace_hdl_t))) == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + + bzero(dtp, sizeof (dtrace_hdl_t)); + dtp->dt_oflags = flags; +#if defined(sun) + dtp->dt_prcmode = DT_PROC_STOP_PREINIT; +#else + dtp->dt_prcmode = DT_PROC_STOP_POSTINIT; +#endif + dtp->dt_linkmode = DT_LINK_KERNEL; + dtp->dt_linktype = DT_LTYP_ELF; + dtp->dt_xlatemode = DT_XL_STATIC; + dtp->dt_stdcmode = DT_STDC_XA; + dtp->dt_encoding = DT_ENCODING_UNSET; + dtp->dt_version = version; + dtp->dt_fd = dtfd; + dtp->dt_ftfd = ftfd; + dtp->dt_fterr = fterr; + dtp->dt_cdefs_fd = -1; + dtp->dt_ddefs_fd = -1; +#if defined(sun) + dtp->dt_stdout_fd = -1; +#else + dtp->dt_freopen_fp = NULL; +#endif + dtp->dt_modbuckets = _dtrace_strbuckets; + dtp->dt_mods = calloc(dtp->dt_modbuckets, sizeof (dt_module_t *)); + dtp->dt_provbuckets = _dtrace_strbuckets; + dtp->dt_provs = calloc(dtp->dt_provbuckets, sizeof (dt_provider_t *)); + dt_proc_hash_create(dtp); + dtp->dt_vmax = DT_VERS_LATEST; + dtp->dt_cpp_path = strdup(_dtrace_defcpp); + dtp->dt_cpp_argv = malloc(sizeof (char *)); + dtp->dt_cpp_argc = 1; + dtp->dt_cpp_args = 1; + dtp->dt_ld_path = strdup(_dtrace_defld); + dtp->dt_provmod = provmod; + dtp->dt_vector = vector; + dtp->dt_varg = arg; + dt_dof_init(dtp); + (void) uname(&dtp->dt_uts); + + if (dtp->dt_mods == NULL || dtp->dt_provs == NULL || + dtp->dt_procs == NULL || dtp->dt_ld_path == NULL || + dtp->dt_cpp_path == NULL || dtp->dt_cpp_argv == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + + for (i = 0; i < DTRACEOPT_MAX; i++) + dtp->dt_options[i] = DTRACEOPT_UNSET; + + dtp->dt_cpp_argv[0] = (char *)strbasename(dtp->dt_cpp_path); + +#if defined(sun) + (void) snprintf(isadef, sizeof (isadef), "-D__SUNW_D_%u", + (uint_t)(sizeof (void *) * NBBY)); + + (void) snprintf(utsdef, sizeof (utsdef), "-D__%s_%s", + dt_get_sysinfo(SI_SYSNAME, s1, sizeof (s1)), + dt_get_sysinfo(SI_RELEASE, s2, sizeof (s2))); + + if (dt_cpp_add_arg(dtp, "-D__sun") == NULL || + dt_cpp_add_arg(dtp, "-D__unix") == NULL || + dt_cpp_add_arg(dtp, "-D__SVR4") == NULL || + dt_cpp_add_arg(dtp, "-D__SUNW_D=1") == NULL || + dt_cpp_add_arg(dtp, isadef) == NULL || + dt_cpp_add_arg(dtp, utsdef) == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); +#endif + + if (flags & DTRACE_O_NODEV) + bcopy(&_dtrace_conf, &dtp->dt_conf, sizeof (_dtrace_conf)); + else if (dt_ioctl(dtp, DTRACEIOC_CONF, &dtp->dt_conf) != 0) + return (set_open_errno(dtp, errp, errno)); + + if (flags & DTRACE_O_LP64) + dtp->dt_conf.dtc_ctfmodel = CTF_MODEL_LP64; + else if (flags & DTRACE_O_ILP32) + dtp->dt_conf.dtc_ctfmodel = CTF_MODEL_ILP32; + +#ifdef __sparc + /* + * On SPARC systems, __sparc is always defined for <sys/isa_defs.h> + * and __sparcv9 is defined if we are doing a 64-bit compile. + */ + if (dt_cpp_add_arg(dtp, "-D__sparc") == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + + if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64 && + dt_cpp_add_arg(dtp, "-D__sparcv9") == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); +#endif + +#if defined(sun) +#ifdef __x86 + /* + * On x86 systems, __i386 is defined for <sys/isa_defs.h> for 32-bit + * compiles and __amd64 is defined for 64-bit compiles. Unlike SPARC, + * they are defined exclusive of one another (see PSARC 2004/619). + */ + if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64) { + if (dt_cpp_add_arg(dtp, "-D__amd64") == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + } else { + if (dt_cpp_add_arg(dtp, "-D__i386") == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + } +#endif +#else +#if defined(__amd64__) || defined(__i386__) + if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64) { + if (dt_cpp_add_arg(dtp, "-m64") == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + } else { + if (dt_cpp_add_arg(dtp, "-m32") == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + } +#endif +#endif + + if (dtp->dt_conf.dtc_difversion < DIF_VERSION) + return (set_open_errno(dtp, errp, EDT_DIFVERS)); + + if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_ILP32) + bcopy(_dtrace_ints_32, dtp->dt_ints, sizeof (_dtrace_ints_32)); + else + bcopy(_dtrace_ints_64, dtp->dt_ints, sizeof (_dtrace_ints_64)); + + /* + * On FreeBSD the kernel module name can't be hard-coded. The + * 'kern.bootfile' sysctl value tells us exactly which file is being + * used as the kernel. + */ +#if !defined(sun) + { + char bootfile[MAXPATHLEN]; + char *p; + int i; + size_t len = sizeof(bootfile); + + /* This call shouldn't fail, but use a default just in case. */ + if (sysctlbyname("kern.bootfile", bootfile, &len, NULL, 0) != 0) + strlcpy(bootfile, "kernel", sizeof(bootfile)); + + if ((p = strrchr(bootfile, '/')) != NULL) + p++; + else + p = bootfile; + + /* + * Format the global variables based on the kernel module name. + */ + snprintf(curthread_str, sizeof(curthread_str), "%s`struct thread *",p); + snprintf(intmtx_str, sizeof(intmtx_str), "int(%s`struct mtx *)",p); + snprintf(threadmtx_str, sizeof(threadmtx_str), "struct thread *(%s`struct mtx *)",p); + snprintf(rwlock_str, sizeof(rwlock_str), "int(%s`struct rwlock *)",p); + snprintf(sxlock_str, sizeof(sxlock_str), "int(%s`struct sxlock *)",p); + } +#endif + + dtp->dt_macros = dt_idhash_create("macro", NULL, 0, UINT_MAX); + dtp->dt_aggs = dt_idhash_create("aggregation", NULL, + DTRACE_AGGVARIDNONE + 1, UINT_MAX); + + dtp->dt_globals = dt_idhash_create("global", _dtrace_globals, + DIF_VAR_OTHER_UBASE, DIF_VAR_OTHER_MAX); + + dtp->dt_tls = dt_idhash_create("thread local", NULL, + DIF_VAR_OTHER_UBASE, DIF_VAR_OTHER_MAX); + + if (dtp->dt_macros == NULL || dtp->dt_aggs == NULL || + dtp->dt_globals == NULL || dtp->dt_tls == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + + /* + * Populate the dt_macros identifier hash table by hand: we can't use + * the dt_idhash_populate() mechanism because we're not yet compiling + * and dtrace_update() needs to immediately reference these idents. + */ + for (idp = _dtrace_macros; idp->di_name != NULL; idp++) { + if (dt_idhash_insert(dtp->dt_macros, idp->di_name, + idp->di_kind, idp->di_flags, idp->di_id, idp->di_attr, + idp->di_vers, idp->di_ops ? idp->di_ops : &dt_idops_thaw, + idp->di_iarg, 0) == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + } + + /* + * Update the module list using /system/object and load the values for + * the macro variable definitions according to the current process. + */ + dtrace_update(dtp); + + /* + * Select the intrinsics and typedefs we want based on the data model. + * The intrinsics are under "C". The typedefs are added under "D". + */ + if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_ILP32) { + dinp = _dtrace_intrinsics_32; + dtyp = _dtrace_typedefs_32; + } else { + dinp = _dtrace_intrinsics_64; + dtyp = _dtrace_typedefs_64; + } + + /* + * Create a dynamic CTF container under the "C" scope for intrinsic + * types and types defined in ANSI-C header files that are included. + */ + if ((dmp = dtp->dt_cdefs = dt_module_create(dtp, "C")) == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + + if ((dmp->dm_ctfp = ctf_create(&dtp->dt_ctferr)) == NULL) + return (set_open_errno(dtp, errp, EDT_CTF)); + + dt_dprintf("created CTF container for %s (%p)\n", + dmp->dm_name, (void *)dmp->dm_ctfp); + + (void) ctf_setmodel(dmp->dm_ctfp, dtp->dt_conf.dtc_ctfmodel); + ctf_setspecific(dmp->dm_ctfp, dmp); + + dmp->dm_flags = DT_DM_LOADED; /* fake up loaded bit */ + dmp->dm_modid = -1; /* no module ID */ + + /* + * Fill the dynamic "C" CTF container with all of the intrinsic + * integer and floating-point types appropriate for this data model. + */ + for (; dinp->din_name != NULL; dinp++) { + if (dinp->din_kind == CTF_K_INTEGER) { + err = ctf_add_integer(dmp->dm_ctfp, CTF_ADD_ROOT, + dinp->din_name, &dinp->din_data); + } else { + err = ctf_add_float(dmp->dm_ctfp, CTF_ADD_ROOT, + dinp->din_name, &dinp->din_data); + } + + if (err == CTF_ERR) { + dt_dprintf("failed to add %s to C container: %s\n", + dinp->din_name, ctf_errmsg( + ctf_errno(dmp->dm_ctfp))); + return (set_open_errno(dtp, errp, EDT_CTF)); + } + } + + if (ctf_update(dmp->dm_ctfp) != 0) { + dt_dprintf("failed to update C container: %s\n", + ctf_errmsg(ctf_errno(dmp->dm_ctfp))); + return (set_open_errno(dtp, errp, EDT_CTF)); + } + + /* + * Add intrinsic pointer types that are needed to initialize printf + * format dictionary types (see table in dt_printf.c). + */ + (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, + ctf_lookup_by_name(dmp->dm_ctfp, "void")); + + (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, + ctf_lookup_by_name(dmp->dm_ctfp, "char")); + + (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, + ctf_lookup_by_name(dmp->dm_ctfp, "int")); + + if (ctf_update(dmp->dm_ctfp) != 0) { + dt_dprintf("failed to update C container: %s\n", + ctf_errmsg(ctf_errno(dmp->dm_ctfp))); + return (set_open_errno(dtp, errp, EDT_CTF)); + } + + /* + * Create a dynamic CTF container under the "D" scope for types that + * are defined by the D program itself or on-the-fly by the D compiler. + * The "D" CTF container is a child of the "C" CTF container. + */ + if ((dmp = dtp->dt_ddefs = dt_module_create(dtp, "D")) == NULL) + return (set_open_errno(dtp, errp, EDT_NOMEM)); + + if ((dmp->dm_ctfp = ctf_create(&dtp->dt_ctferr)) == NULL) + return (set_open_errno(dtp, errp, EDT_CTF)); + + dt_dprintf("created CTF container for %s (%p)\n", + dmp->dm_name, (void *)dmp->dm_ctfp); + + (void) ctf_setmodel(dmp->dm_ctfp, dtp->dt_conf.dtc_ctfmodel); + ctf_setspecific(dmp->dm_ctfp, dmp); + + dmp->dm_flags = DT_DM_LOADED; /* fake up loaded bit */ + dmp->dm_modid = -1; /* no module ID */ + + if (ctf_import(dmp->dm_ctfp, dtp->dt_cdefs->dm_ctfp) == CTF_ERR) { + dt_dprintf("failed to import D parent container: %s\n", + ctf_errmsg(ctf_errno(dmp->dm_ctfp))); + return (set_open_errno(dtp, errp, EDT_CTF)); + } + + /* + * Fill the dynamic "D" CTF container with all of the built-in typedefs + * that we need to use for our D variable and function definitions. + * This ensures that basic inttypes.h names are always available to us. + */ + for (; dtyp->dty_src != NULL; dtyp++) { + if (ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT, + dtyp->dty_dst, ctf_lookup_by_name(dmp->dm_ctfp, + dtyp->dty_src)) == CTF_ERR) { + dt_dprintf("failed to add typedef %s %s to D " + "container: %s", dtyp->dty_src, dtyp->dty_dst, + ctf_errmsg(ctf_errno(dmp->dm_ctfp))); + return (set_open_errno(dtp, errp, EDT_CTF)); + } + } + + /* + * Insert a CTF ID corresponding to a pointer to a type of kind + * CTF_K_FUNCTION we can use in the compiler for function pointers. + * CTF treats all function pointers as "int (*)()" so we only need one. + */ + ctc.ctc_return = ctf_lookup_by_name(dmp->dm_ctfp, "int"); + ctc.ctc_argc = 0; + ctc.ctc_flags = 0; + + dtp->dt_type_func = ctf_add_function(dmp->dm_ctfp, + CTF_ADD_ROOT, &ctc, NULL); + + dtp->dt_type_fptr = ctf_add_pointer(dmp->dm_ctfp, + CTF_ADD_ROOT, dtp->dt_type_func); + + /* + * We also insert CTF definitions for the special D intrinsic types + * string and <DYN> into the D container. The string type is added + * as a typedef of char[n]. The <DYN> type is an alias for void. + * We compare types to these special CTF ids throughout the compiler. + */ + ctr.ctr_contents = ctf_lookup_by_name(dmp->dm_ctfp, "char"); + ctr.ctr_index = ctf_lookup_by_name(dmp->dm_ctfp, "long"); + ctr.ctr_nelems = _dtrace_strsize; + + dtp->dt_type_str = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT, + "string", ctf_add_array(dmp->dm_ctfp, CTF_ADD_ROOT, &ctr)); + + dtp->dt_type_dyn = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT, + "<DYN>", ctf_lookup_by_name(dmp->dm_ctfp, "void")); + + dtp->dt_type_stack = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT, + "stack", ctf_lookup_by_name(dmp->dm_ctfp, "void")); + + dtp->dt_type_symaddr = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT, + "_symaddr", ctf_lookup_by_name(dmp->dm_ctfp, "void")); + + dtp->dt_type_usymaddr = ctf_add_typedef(dmp->dm_ctfp, CTF_ADD_ROOT, + "_usymaddr", ctf_lookup_by_name(dmp->dm_ctfp, "void")); + + if (dtp->dt_type_func == CTF_ERR || dtp->dt_type_fptr == CTF_ERR || + dtp->dt_type_str == CTF_ERR || dtp->dt_type_dyn == CTF_ERR || + dtp->dt_type_stack == CTF_ERR || dtp->dt_type_symaddr == CTF_ERR || + dtp->dt_type_usymaddr == CTF_ERR) { + dt_dprintf("failed to add intrinsic to D container: %s\n", + ctf_errmsg(ctf_errno(dmp->dm_ctfp))); + return (set_open_errno(dtp, errp, EDT_CTF)); + } + + if (ctf_update(dmp->dm_ctfp) != 0) { + dt_dprintf("failed update D container: %s\n", + ctf_errmsg(ctf_errno(dmp->dm_ctfp))); + return (set_open_errno(dtp, errp, EDT_CTF)); + } + + /* + * Initialize the integer description table used to convert integer + * constants to the appropriate types. Refer to the comments above + * dt_node_int() for a complete description of how this table is used. + */ + for (i = 0; i < sizeof (dtp->dt_ints) / sizeof (dtp->dt_ints[0]); i++) { + if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_EVERY, + dtp->dt_ints[i].did_name, &dtt) != 0) { + dt_dprintf("failed to lookup integer type %s: %s\n", + dtp->dt_ints[i].did_name, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + return (set_open_errno(dtp, errp, dtp->dt_errno)); + } + dtp->dt_ints[i].did_ctfp = dtt.dtt_ctfp; + dtp->dt_ints[i].did_type = dtt.dtt_type; + } + + /* + * Now that we've created the "C" and "D" containers, move them to the + * start of the module list so that these types and symbols are found + * first (for stability) when iterating through the module list. + */ + dt_list_delete(&dtp->dt_modlist, dtp->dt_ddefs); + dt_list_prepend(&dtp->dt_modlist, dtp->dt_ddefs); + + dt_list_delete(&dtp->dt_modlist, dtp->dt_cdefs); + dt_list_prepend(&dtp->dt_modlist, dtp->dt_cdefs); + + if (dt_pfdict_create(dtp) == -1) + return (set_open_errno(dtp, errp, dtp->dt_errno)); + + /* + * If we are opening libdtrace DTRACE_O_NODEV enable C_ZDEFS by default + * because without /dev/dtrace open, we will not be able to load the + * names and attributes of any providers or probes from the kernel. + */ + if (flags & DTRACE_O_NODEV) + dtp->dt_cflags |= DTRACE_C_ZDEFS; + + /* + * Load hard-wired inlines into the definition cache by calling the + * compiler on the raw definition string defined above. + */ + if ((pgp = dtrace_program_strcompile(dtp, _dtrace_hardwire, + DTRACE_PROBESPEC_NONE, DTRACE_C_EMPTY, 0, NULL)) == NULL) { + dt_dprintf("failed to load hard-wired definitions: %s\n", + dtrace_errmsg(dtp, dtrace_errno(dtp))); + return (set_open_errno(dtp, errp, EDT_HARDWIRE)); + } + + dt_program_destroy(dtp, pgp); + + /* + * Set up the default DTrace library path. Once set, the next call to + * dt_compile() will compile all the libraries. We intentionally defer + * library processing to improve overhead for clients that don't ever + * compile, and to provide better error reporting (because the full + * reporting of compiler errors requires dtrace_open() to succeed). + */ + if (dtrace_setopt(dtp, "libdir", _dtrace_libdir) != 0) + return (set_open_errno(dtp, errp, dtp->dt_errno)); + + return (dtp); +} + +dtrace_hdl_t * +dtrace_open(int version, int flags, int *errp) +{ + return (dt_vopen(version, flags, errp, NULL, NULL)); +} + +dtrace_hdl_t * +dtrace_vopen(int version, int flags, int *errp, + const dtrace_vector_t *vector, void *arg) +{ + return (dt_vopen(version, flags, errp, vector, arg)); +} + +void +dtrace_close(dtrace_hdl_t *dtp) +{ + dt_ident_t *idp, *ndp; + dt_module_t *dmp; + dt_provider_t *pvp; + dtrace_prog_t *pgp; + dt_xlator_t *dxp; + dt_dirpath_t *dirp; + int i; + + if (dtp->dt_procs != NULL) + dt_proc_hash_destroy(dtp); + + while ((pgp = dt_list_next(&dtp->dt_programs)) != NULL) + dt_program_destroy(dtp, pgp); + + while ((dxp = dt_list_next(&dtp->dt_xlators)) != NULL) + dt_xlator_destroy(dtp, dxp); + + dt_free(dtp, dtp->dt_xlatormap); + + for (idp = dtp->dt_externs; idp != NULL; idp = ndp) { + ndp = idp->di_next; + dt_ident_destroy(idp); + } + + if (dtp->dt_macros != NULL) + dt_idhash_destroy(dtp->dt_macros); + if (dtp->dt_aggs != NULL) + dt_idhash_destroy(dtp->dt_aggs); + if (dtp->dt_globals != NULL) + dt_idhash_destroy(dtp->dt_globals); + if (dtp->dt_tls != NULL) + dt_idhash_destroy(dtp->dt_tls); + + while ((dmp = dt_list_next(&dtp->dt_modlist)) != NULL) + dt_module_destroy(dtp, dmp); + + while ((pvp = dt_list_next(&dtp->dt_provlist)) != NULL) + dt_provider_destroy(dtp, pvp); + + if (dtp->dt_fd != -1) + (void) close(dtp->dt_fd); + if (dtp->dt_ftfd != -1) + (void) close(dtp->dt_ftfd); + if (dtp->dt_cdefs_fd != -1) + (void) close(dtp->dt_cdefs_fd); + if (dtp->dt_ddefs_fd != -1) + (void) close(dtp->dt_ddefs_fd); +#if defined(sun) + if (dtp->dt_stdout_fd != -1) + (void) close(dtp->dt_stdout_fd); +#else + if (dtp->dt_freopen_fp != NULL) + (void) fclose(dtp->dt_freopen_fp); +#endif + + dt_epid_destroy(dtp); + dt_aggid_destroy(dtp); + dt_format_destroy(dtp); + dt_strdata_destroy(dtp); + dt_buffered_destroy(dtp); + dt_aggregate_destroy(dtp); + dt_pfdict_destroy(dtp); + dt_provmod_destroy(&dtp->dt_provmod); + dt_dof_fini(dtp); + + for (i = 1; i < dtp->dt_cpp_argc; i++) + free(dtp->dt_cpp_argv[i]); + + while ((dirp = dt_list_next(&dtp->dt_lib_path)) != NULL) { + dt_list_delete(&dtp->dt_lib_path, dirp); + free(dirp->dir_path); + free(dirp); + } + + free(dtp->dt_cpp_argv); + free(dtp->dt_cpp_path); + free(dtp->dt_ld_path); + + free(dtp->dt_mods); + free(dtp->dt_provs); + free(dtp); +} + +int +dtrace_provider_modules(dtrace_hdl_t *dtp, const char **mods, int nmods) +{ + dt_provmod_t *prov; + int i = 0; + + for (prov = dtp->dt_provmod; prov != NULL; prov = prov->dp_next, i++) { + if (i < nmods) + mods[i] = prov->dp_name; + } + + return (i); +} + +int +dtrace_ctlfd(dtrace_hdl_t *dtp) +{ + return (dtp->dt_fd); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_options.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_options.c new file mode 100644 index 0000000..832af88 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_options.c @@ -0,0 +1,1087 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <sys/resource.h> +#include <sys/mman.h> +#include <sys/types.h> + +#include <strings.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <limits.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <errno.h> +#include <fcntl.h> + +#include <dt_impl.h> +#include <dt_string.h> + +static int +dt_opt_agg(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dt_aggregate_t *agp = &dtp->dt_aggregate; + + if (arg != NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + agp->dtat_flags |= option; + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_amin(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + char str[DTRACE_ATTR2STR_MAX]; + dtrace_attribute_t attr; + + if (arg == NULL || dtrace_str2attr(arg, &attr) == -1) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dt_dprintf("set compiler attribute minimum to %s\n", + dtrace_attr2str(attr, str, sizeof (str))); + + if (dtp->dt_pcb != NULL) { + dtp->dt_pcb->pcb_cflags |= DTRACE_C_EATTR; + dtp->dt_pcb->pcb_amin = attr; + } else { + dtp->dt_cflags |= DTRACE_C_EATTR; + dtp->dt_amin = attr; + } + + return (0); +} + +static void +dt_coredump(void) +{ + const char msg[] = "libdtrace DEBUG: [ forcing coredump ]\n"; + + struct sigaction act; + struct rlimit lim; + + (void) write(STDERR_FILENO, msg, sizeof (msg) - 1); + + act.sa_handler = SIG_DFL; + act.sa_flags = 0; + + (void) sigemptyset(&act.sa_mask); + (void) sigaction(SIGABRT, &act, NULL); + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; + + (void) setrlimit(RLIMIT_CORE, &lim); + abort(); +} + +/*ARGSUSED*/ +static int +dt_opt_core(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + static int enabled = 0; + + if (arg != NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (enabled++ || atexit(dt_coredump) == 0) + return (0); + + return (dt_set_errno(dtp, errno)); +} + +/*ARGSUSED*/ +static int +dt_opt_cpp_hdrs(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg != NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (dtp->dt_pcb != NULL) + return (dt_set_errno(dtp, EDT_BADOPTCTX)); + + if (dt_cpp_add_arg(dtp, "-H") == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_cpp_path(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + char *cpp; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (dtp->dt_pcb != NULL) + return (dt_set_errno(dtp, EDT_BADOPTCTX)); + + if ((cpp = strdup(arg)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + dtp->dt_cpp_argv[0] = (char *)strbasename(cpp); + free(dtp->dt_cpp_path); + dtp->dt_cpp_path = cpp; + + return (0); +} + +static int +dt_opt_cpp_opts(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + char *buf; + size_t len; + const char *opt = (const char *)option; + + if (opt == NULL || arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (dtp->dt_pcb != NULL) + return (dt_set_errno(dtp, EDT_BADOPTCTX)); + + len = strlen(opt) + strlen(arg) + 1; + buf = alloca(len); + + (void) strcpy(buf, opt); + (void) strcat(buf, arg); + + if (dt_cpp_add_arg(dtp, buf) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_ctypes(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + int fd; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if ((fd = open64(arg, O_CREAT | O_WRONLY, 0666)) == -1) + return (dt_set_errno(dtp, errno)); + + (void) close(dtp->dt_cdefs_fd); + dtp->dt_cdefs_fd = fd; + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_droptags(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dtp->dt_droptags = 1; + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_dtypes(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + int fd; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if ((fd = open64(arg, O_CREAT | O_WRONLY, 0666)) == -1) + return (dt_set_errno(dtp, errno)); + + (void) close(dtp->dt_ddefs_fd); + dtp->dt_ddefs_fd = fd; + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_debug(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg != NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + _dtrace_debug = 1; + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_iregs(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + int n; + + if (arg == NULL || (n = atoi(arg)) <= 0) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dtp->dt_conf.dtc_difintregs = n; + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_lazyload(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dtp->dt_lazyload = 1; + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_ld_path(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + char *ld; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (dtp->dt_pcb != NULL) + return (dt_set_errno(dtp, EDT_BADOPTCTX)); + + if ((ld = strdup(arg)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + free(dtp->dt_ld_path); + dtp->dt_ld_path = ld; + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_libdir(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dt_dirpath_t *dp; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if ((dp = malloc(sizeof (dt_dirpath_t))) == NULL || + (dp->dir_path = strdup(arg)) == NULL) { + free(dp); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + dt_list_append(&dtp->dt_lib_path, dp); + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_linkmode(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (strcmp(arg, "kernel") == 0) + dtp->dt_linkmode = DT_LINK_KERNEL; + else if (strcmp(arg, "primary") == 0) + dtp->dt_linkmode = DT_LINK_PRIMARY; + else if (strcmp(arg, "dynamic") == 0) + dtp->dt_linkmode = DT_LINK_DYNAMIC; + else if (strcmp(arg, "static") == 0) + dtp->dt_linkmode = DT_LINK_STATIC; + else + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_linktype(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (strcasecmp(arg, "elf") == 0) + dtp->dt_linktype = DT_LTYP_ELF; + else if (strcasecmp(arg, "dof") == 0) + dtp->dt_linktype = DT_LTYP_DOF; + else + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_encoding(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (strcmp(arg, "ascii") == 0) + dtp->dt_encoding = DT_ENCODING_ASCII; + else if (strcmp(arg, "utf8") == 0) + dtp->dt_encoding = DT_ENCODING_UTF8; + else + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_evaltime(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (strcmp(arg, "exec") == 0) + dtp->dt_prcmode = DT_PROC_STOP_CREATE; + else if (strcmp(arg, "preinit") == 0) + dtp->dt_prcmode = DT_PROC_STOP_PREINIT; + else if (strcmp(arg, "postinit") == 0) + dtp->dt_prcmode = DT_PROC_STOP_POSTINIT; + else if (strcmp(arg, "main") == 0) + dtp->dt_prcmode = DT_PROC_STOP_MAIN; + else + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_pgmax(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + int n; + + if (arg == NULL || (n = atoi(arg)) < 0) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dtp->dt_procs->dph_lrulim = n; + return (0); +} + +static int +dt_opt_setenv(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + char **p; + char *var; + int i; + + /* + * We can't effectively set environment variables from #pragma lines + * since the processes have already been spawned. + */ + if (dtp->dt_pcb != NULL) + return (dt_set_errno(dtp, EDT_BADOPTCTX)); + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (!option && strchr(arg, '=') != NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + for (i = 1, p = dtp->dt_proc_env; *p != NULL; i++, p++) + continue; + + for (p = dtp->dt_proc_env; *p != NULL; p++) { + var = strchr(*p, '='); + if (var == NULL) + var = *p + strlen(*p); + if (strncmp(*p, arg, var - *p) == 0) { + dt_free(dtp, *p); + *p = dtp->dt_proc_env[i - 1]; + dtp->dt_proc_env[i - 1] = NULL; + i--; + } + } + + if (option) { + if ((var = strdup(arg)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + if ((p = dt_alloc(dtp, sizeof (char *) * (i + 1))) == NULL) { + dt_free(dtp, var); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + bcopy(dtp->dt_proc_env, p, sizeof (char *) * i); + dt_free(dtp, dtp->dt_proc_env); + dtp->dt_proc_env = p; + + dtp->dt_proc_env[i - 1] = var; + dtp->dt_proc_env[i] = NULL; + } + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_stdc(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (dtp->dt_pcb != NULL) + return (dt_set_errno(dtp, EDT_BADOPTCTX)); + + if (strcmp(arg, "a") == 0) + dtp->dt_stdcmode = DT_STDC_XA; + else if (strcmp(arg, "c") == 0) + dtp->dt_stdcmode = DT_STDC_XC; + else if (strcmp(arg, "s") == 0) + dtp->dt_stdcmode = DT_STDC_XS; + else if (strcmp(arg, "t") == 0) + dtp->dt_stdcmode = DT_STDC_XT; + else + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_syslibdir(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dt_dirpath_t *dp = dt_list_next(&dtp->dt_lib_path); + char *path; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if ((path = strdup(arg)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + free(dp->dir_path); + dp->dir_path = path; + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_tree(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + int m; + + if (arg == NULL || (m = atoi(arg)) <= 0) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dtp->dt_treedump = m; + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_tregs(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + int n; + + if (arg == NULL || (n = atoi(arg)) <= 0) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dtp->dt_conf.dtc_diftupregs = n; + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_xlate(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (strcmp(arg, "dynamic") == 0) + dtp->dt_xlatemode = DT_XL_DYNAMIC; + else if (strcmp(arg, "static") == 0) + dtp->dt_xlatemode = DT_XL_STATIC; + else + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_cflags(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg != NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (dtp->dt_pcb != NULL) + dtp->dt_pcb->pcb_cflags |= option; + else + dtp->dt_cflags |= option; + + return (0); +} + +static int +dt_opt_dflags(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg != NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dtp->dt_dflags |= option; + return (0); +} + +static int +dt_opt_invcflags(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + if (arg != NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (dtp->dt_pcb != NULL) + dtp->dt_pcb->pcb_cflags &= ~option; + else + dtp->dt_cflags &= ~option; + + return (0); +} + +/*ARGSUSED*/ +static int +dt_opt_version(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dt_version_t v; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (dt_version_str2num(arg, &v) == -1) + return (dt_set_errno(dtp, EDT_VERSINVAL)); + + if (!dt_version_defined(v)) + return (dt_set_errno(dtp, EDT_VERSUNDEF)); + + return (dt_reduce(dtp, v)); +} + +static int +dt_opt_runtime(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + char *end; + dtrace_optval_t val = 0; + int i; + + const struct { + char *positive; + char *negative; + } couples[] = { + { "yes", "no" }, + { "enable", "disable" }, + { "enabled", "disabled" }, + { "true", "false" }, + { "on", "off" }, + { "set", "unset" }, + { NULL } + }; + + if (arg != NULL) { + if (arg[0] == '\0') { + val = DTRACEOPT_UNSET; + goto out; + } + + for (i = 0; couples[i].positive != NULL; i++) { + if (strcasecmp(couples[i].positive, arg) == 0) { + val = 1; + goto out; + } + + if (strcasecmp(couples[i].negative, arg) == 0) { + val = DTRACEOPT_UNSET; + goto out; + } + } + + errno = 0; + val = strtoull(arg, &end, 0); + + if (*end != '\0' || errno != 0 || val < 0) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + } + +out: + dtp->dt_options[option] = val; + return (0); +} + +static int +dt_optval_parse(const char *arg, dtrace_optval_t *rval) +{ + dtrace_optval_t mul = 1; + size_t len; + char *end; + + len = strlen(arg); + errno = 0; + + switch (arg[len - 1]) { + case 't': + case 'T': + mul *= 1024; + /*FALLTHRU*/ + case 'g': + case 'G': + mul *= 1024; + /*FALLTHRU*/ + case 'm': + case 'M': + mul *= 1024; + /*FALLTHRU*/ + case 'k': + case 'K': + mul *= 1024; + /*FALLTHRU*/ + default: + break; + } + + errno = 0; + *rval = strtoull(arg, &end, 0) * mul; + + if ((mul > 1 && end != &arg[len - 1]) || (mul == 1 && *end != '\0') || + *rval < 0 || errno != 0) + return (-1); + + return (0); +} + +static int +dt_opt_size(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dtrace_optval_t val = 0; + + if (arg != NULL && dt_optval_parse(arg, &val) != 0) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dtp->dt_options[option] = val; + return (0); +} + +static int +dt_opt_rate(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + char *end; + int i; + dtrace_optval_t mul = 1, val = 0; + + const struct { + char *name; + hrtime_t mul; + } suffix[] = { + { "ns", NANOSEC / NANOSEC }, + { "nsec", NANOSEC / NANOSEC }, + { "us", NANOSEC / MICROSEC }, + { "usec", NANOSEC / MICROSEC }, + { "ms", NANOSEC / MILLISEC }, + { "msec", NANOSEC / MILLISEC }, + { "s", NANOSEC / SEC }, + { "sec", NANOSEC / SEC }, + { "m", NANOSEC * (hrtime_t)60 }, + { "min", NANOSEC * (hrtime_t)60 }, + { "h", NANOSEC * (hrtime_t)60 * (hrtime_t)60 }, + { "hour", NANOSEC * (hrtime_t)60 * (hrtime_t)60 }, + { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, + { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, + { "hz", 0 }, + { NULL } + }; + + if (arg != NULL) { + errno = 0; + val = strtoull(arg, &end, 0); + + for (i = 0; suffix[i].name != NULL; i++) { + if (strcasecmp(suffix[i].name, end) == 0) { + mul = suffix[i].mul; + break; + } + } + + if (suffix[i].name == NULL && *end != '\0' || val < 0) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + if (mul == 0) { + /* + * The rate has been specified in frequency-per-second. + */ + if (val != 0) + val = NANOSEC / val; + } else { + val *= mul; + } + } + + dtp->dt_options[option] = val; + return (0); +} + +/* + * When setting the strsize option, set the option in the dt_options array + * using dt_opt_size() as usual, and then update the definition of the CTF + * type for the D intrinsic "string" to be an array of the corresponding size. + * If any errors occur, reset dt_options[option] to its previous value. + */ +static int +dt_opt_strsize(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dtrace_optval_t val = dtp->dt_options[option]; + ctf_file_t *fp = DT_STR_CTFP(dtp); + ctf_id_t type = ctf_type_resolve(fp, DT_STR_TYPE(dtp)); + ctf_arinfo_t r; + + if (dt_opt_size(dtp, arg, option) != 0) + return (-1); /* dt_errno is set for us */ + + if (dtp->dt_options[option] > UINT_MAX) { + dtp->dt_options[option] = val; + return (dt_set_errno(dtp, EOVERFLOW)); + } + + if (ctf_array_info(fp, type, &r) == CTF_ERR) { + dtp->dt_options[option] = val; + dtp->dt_ctferr = ctf_errno(fp); + return (dt_set_errno(dtp, EDT_CTF)); + } + + r.ctr_nelems = (uint_t)dtp->dt_options[option]; + + if (ctf_set_array(fp, type, &r) == CTF_ERR || + ctf_update(fp) == CTF_ERR) { + dtp->dt_options[option] = val; + dtp->dt_ctferr = ctf_errno(fp); + return (dt_set_errno(dtp, EDT_CTF)); + } + + return (0); +} + +static const struct { + const char *dtbp_name; + int dtbp_policy; +} _dtrace_bufpolicies[] = { + { "ring", DTRACEOPT_BUFPOLICY_RING }, + { "fill", DTRACEOPT_BUFPOLICY_FILL }, + { "switch", DTRACEOPT_BUFPOLICY_SWITCH }, + { NULL, 0 } +}; + +/*ARGSUSED*/ +static int +dt_opt_bufpolicy(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dtrace_optval_t policy = DTRACEOPT_UNSET; + int i; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + for (i = 0; _dtrace_bufpolicies[i].dtbp_name != NULL; i++) { + if (strcmp(_dtrace_bufpolicies[i].dtbp_name, arg) == 0) { + policy = _dtrace_bufpolicies[i].dtbp_policy; + break; + } + } + + if (policy == DTRACEOPT_UNSET) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dtp->dt_options[DTRACEOPT_BUFPOLICY] = policy; + + return (0); +} + +static const struct { + const char *dtbr_name; + int dtbr_policy; +} _dtrace_bufresize[] = { + { "auto", DTRACEOPT_BUFRESIZE_AUTO }, + { "manual", DTRACEOPT_BUFRESIZE_MANUAL }, + { NULL, 0 } +}; + +/*ARGSUSED*/ +static int +dt_opt_bufresize(dtrace_hdl_t *dtp, const char *arg, uintptr_t option) +{ + dtrace_optval_t policy = DTRACEOPT_UNSET; + int i; + + if (arg == NULL) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + for (i = 0; _dtrace_bufresize[i].dtbr_name != NULL; i++) { + if (strcmp(_dtrace_bufresize[i].dtbr_name, arg) == 0) { + policy = _dtrace_bufresize[i].dtbr_policy; + break; + } + } + + if (policy == DTRACEOPT_UNSET) + return (dt_set_errno(dtp, EDT_BADOPTVAL)); + + dtp->dt_options[DTRACEOPT_BUFRESIZE] = policy; + + return (0); +} + +int +dt_options_load(dtrace_hdl_t *dtp) +{ + dof_hdr_t hdr, *dof; + dof_sec_t *sec; + size_t offs; + int i; + + /* + * To load the option values, we need to ask the kernel to provide its + * DOF, which we'll sift through to look for OPTDESC sections. + */ + bzero(&hdr, sizeof (dof_hdr_t)); + hdr.dofh_loadsz = sizeof (dof_hdr_t); + +#if defined(sun) + if (dt_ioctl(dtp, DTRACEIOC_DOFGET, &hdr) == -1) +#else + dof = &hdr; + if (dt_ioctl(dtp, DTRACEIOC_DOFGET, &dof) == -1) +#endif + return (dt_set_errno(dtp, errno)); + + if (hdr.dofh_loadsz < sizeof (dof_hdr_t)) + return (dt_set_errno(dtp, EINVAL)); + + dof = alloca(hdr.dofh_loadsz); + bzero(dof, sizeof (dof_hdr_t)); + dof->dofh_loadsz = hdr.dofh_loadsz; + + for (i = 0; i < DTRACEOPT_MAX; i++) + dtp->dt_options[i] = DTRACEOPT_UNSET; + +#if defined(sun) + if (dt_ioctl(dtp, DTRACEIOC_DOFGET, dof) == -1) +#else + if (dt_ioctl(dtp, DTRACEIOC_DOFGET, &dof) == -1) +#endif + return (dt_set_errno(dtp, errno)); + + for (i = 0; i < dof->dofh_secnum; i++) { + sec = (dof_sec_t *)(uintptr_t)((uintptr_t)dof + + dof->dofh_secoff + i * dof->dofh_secsize); + + if (sec->dofs_type != DOF_SECT_OPTDESC) + continue; + + break; + } + + for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) { + dof_optdesc_t *opt = (dof_optdesc_t *)(uintptr_t) + ((uintptr_t)dof + sec->dofs_offset + offs); + + if (opt->dofo_strtab != DOF_SECIDX_NONE) + continue; + + if (opt->dofo_option >= DTRACEOPT_MAX) + continue; + + dtp->dt_options[opt->dofo_option] = opt->dofo_value; + } + + return (0); +} + +typedef struct dt_option { + const char *o_name; + int (*o_func)(dtrace_hdl_t *, const char *, uintptr_t); + uintptr_t o_option; +} dt_option_t; + +/* + * Compile-time options. + */ +static const dt_option_t _dtrace_ctoptions[] = { + { "aggpercpu", dt_opt_agg, DTRACE_A_PERCPU }, + { "amin", dt_opt_amin }, + { "argref", dt_opt_cflags, DTRACE_C_ARGREF }, + { "core", dt_opt_core }, + { "cpp", dt_opt_cflags, DTRACE_C_CPP }, + { "cpphdrs", dt_opt_cpp_hdrs }, + { "cpppath", dt_opt_cpp_path }, + { "ctypes", dt_opt_ctypes }, + { "defaultargs", dt_opt_cflags, DTRACE_C_DEFARG }, + { "dtypes", dt_opt_dtypes }, + { "debug", dt_opt_debug }, + { "define", dt_opt_cpp_opts, (uintptr_t)"-D" }, + { "droptags", dt_opt_droptags }, + { "empty", dt_opt_cflags, DTRACE_C_EMPTY }, + { "encoding", dt_opt_encoding }, + { "errtags", dt_opt_cflags, DTRACE_C_ETAGS }, + { "evaltime", dt_opt_evaltime }, + { "incdir", dt_opt_cpp_opts, (uintptr_t)"-I" }, + { "iregs", dt_opt_iregs }, + { "kdefs", dt_opt_invcflags, DTRACE_C_KNODEF }, + { "knodefs", dt_opt_cflags, DTRACE_C_KNODEF }, + { "late", dt_opt_xlate }, + { "lazyload", dt_opt_lazyload }, + { "ldpath", dt_opt_ld_path }, + { "libdir", dt_opt_libdir }, + { "linkmode", dt_opt_linkmode }, + { "linktype", dt_opt_linktype }, + { "nolibs", dt_opt_cflags, DTRACE_C_NOLIBS }, + { "pgmax", dt_opt_pgmax }, + { "pspec", dt_opt_cflags, DTRACE_C_PSPEC }, + { "setenv", dt_opt_setenv, 1 }, + { "stdc", dt_opt_stdc }, + { "strip", dt_opt_dflags, DTRACE_D_STRIP }, + { "syslibdir", dt_opt_syslibdir }, + { "tree", dt_opt_tree }, + { "tregs", dt_opt_tregs }, + { "udefs", dt_opt_invcflags, DTRACE_C_UNODEF }, + { "undef", dt_opt_cpp_opts, (uintptr_t)"-U" }, + { "unodefs", dt_opt_cflags, DTRACE_C_UNODEF }, + { "unsetenv", dt_opt_setenv, 0 }, + { "verbose", dt_opt_cflags, DTRACE_C_DIFV }, + { "version", dt_opt_version }, + { "zdefs", dt_opt_cflags, DTRACE_C_ZDEFS }, + { NULL, NULL, 0 } +}; + +/* + * Run-time options. + */ +static const dt_option_t _dtrace_rtoptions[] = { + { "aggsize", dt_opt_size, DTRACEOPT_AGGSIZE }, + { "bufsize", dt_opt_size, DTRACEOPT_BUFSIZE }, + { "bufpolicy", dt_opt_bufpolicy, DTRACEOPT_BUFPOLICY }, + { "bufresize", dt_opt_bufresize, DTRACEOPT_BUFRESIZE }, + { "cleanrate", dt_opt_rate, DTRACEOPT_CLEANRATE }, + { "cpu", dt_opt_runtime, DTRACEOPT_CPU }, + { "destructive", dt_opt_runtime, DTRACEOPT_DESTRUCTIVE }, + { "dynvarsize", dt_opt_size, DTRACEOPT_DYNVARSIZE }, + { "grabanon", dt_opt_runtime, DTRACEOPT_GRABANON }, + { "jstackframes", dt_opt_runtime, DTRACEOPT_JSTACKFRAMES }, + { "jstackstrsize", dt_opt_size, DTRACEOPT_JSTACKSTRSIZE }, + { "nspec", dt_opt_runtime, DTRACEOPT_NSPEC }, + { "specsize", dt_opt_size, DTRACEOPT_SPECSIZE }, + { "stackframes", dt_opt_runtime, DTRACEOPT_STACKFRAMES }, + { "statusrate", dt_opt_rate, DTRACEOPT_STATUSRATE }, + { "strsize", dt_opt_strsize, DTRACEOPT_STRSIZE }, + { "ustackframes", dt_opt_runtime, DTRACEOPT_USTACKFRAMES }, + { "temporal", dt_opt_runtime, DTRACEOPT_TEMPORAL }, + { NULL, NULL, 0 } +}; + +/* + * Dynamic run-time options. + */ +static const dt_option_t _dtrace_drtoptions[] = { + { "agghist", dt_opt_runtime, DTRACEOPT_AGGHIST }, + { "aggpack", dt_opt_runtime, DTRACEOPT_AGGPACK }, + { "aggrate", dt_opt_rate, DTRACEOPT_AGGRATE }, + { "aggsortkey", dt_opt_runtime, DTRACEOPT_AGGSORTKEY }, + { "aggsortkeypos", dt_opt_runtime, DTRACEOPT_AGGSORTKEYPOS }, + { "aggsortpos", dt_opt_runtime, DTRACEOPT_AGGSORTPOS }, + { "aggsortrev", dt_opt_runtime, DTRACEOPT_AGGSORTREV }, + { "aggzoom", dt_opt_runtime, DTRACEOPT_AGGZOOM }, + { "flowindent", dt_opt_runtime, DTRACEOPT_FLOWINDENT }, + { "quiet", dt_opt_runtime, DTRACEOPT_QUIET }, + { "rawbytes", dt_opt_runtime, DTRACEOPT_RAWBYTES }, + { "stackindent", dt_opt_runtime, DTRACEOPT_STACKINDENT }, + { "switchrate", dt_opt_rate, DTRACEOPT_SWITCHRATE }, + { NULL, NULL, 0 } +}; + +int +dtrace_getopt(dtrace_hdl_t *dtp, const char *opt, dtrace_optval_t *val) +{ + const dt_option_t *op; + + if (opt == NULL) + return (dt_set_errno(dtp, EINVAL)); + + /* + * We only need to search the run-time options -- it's not legal + * to get the values of compile-time options. + */ + for (op = _dtrace_rtoptions; op->o_name != NULL; op++) { + if (strcmp(op->o_name, opt) == 0) { + *val = dtp->dt_options[op->o_option]; + return (0); + } + } + + for (op = _dtrace_drtoptions; op->o_name != NULL; op++) { + if (strcmp(op->o_name, opt) == 0) { + *val = dtp->dt_options[op->o_option]; + return (0); + } + } + + return (dt_set_errno(dtp, EDT_BADOPTNAME)); +} + +int +dtrace_setopt(dtrace_hdl_t *dtp, const char *opt, const char *val) +{ + const dt_option_t *op; + + if (opt == NULL) + return (dt_set_errno(dtp, EINVAL)); + + for (op = _dtrace_ctoptions; op->o_name != NULL; op++) { + if (strcmp(op->o_name, opt) == 0) + return (op->o_func(dtp, val, op->o_option)); + } + + for (op = _dtrace_drtoptions; op->o_name != NULL; op++) { + if (strcmp(op->o_name, opt) == 0) + return (op->o_func(dtp, val, op->o_option)); + } + + for (op = _dtrace_rtoptions; op->o_name != NULL; op++) { + if (strcmp(op->o_name, opt) == 0) { + /* + * Only dynamic run-time options may be set while + * tracing is active. + */ + if (dtp->dt_active) + return (dt_set_errno(dtp, EDT_ACTIVE)); + + return (op->o_func(dtp, val, op->o_option)); + } + } + + return (dt_set_errno(dtp, EDT_BADOPTNAME)); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_parser.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_parser.c new file mode 100644 index 0000000..6ce3dad --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_parser.c @@ -0,0 +1,4985 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2013, Joyent Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * DTrace D Language Parser + * + * The D Parser is a lex/yacc parser consisting of the lexer dt_lex.l, the + * parsing grammar dt_grammar.y, and this file, dt_parser.c, which handles + * the construction of the parse tree nodes and their syntactic validation. + * The parse tree is constructed of dt_node_t structures (see <dt_parser.h>) + * that are built in two passes: (1) the "create" pass, where the parse tree + * nodes are allocated by calls from the grammar to dt_node_*() subroutines, + * and (2) the "cook" pass, where nodes are coalesced, assigned D types, and + * validated according to the syntactic rules of the language. + * + * All node allocations are performed using dt_node_alloc(). All node frees + * during the parsing phase are performed by dt_node_free(), which frees node- + * internal state but does not actually free the nodes. All final node frees + * are done as part of the end of dt_compile() or as part of destroying + * persistent identifiers or translators which have embedded nodes. + * + * The dt_node_* routines that implement pass (1) may allocate new nodes. The + * dt_cook_* routines that implement pass (2) may *not* allocate new nodes. + * They may free existing nodes using dt_node_free(), but they may not actually + * deallocate any dt_node_t's. Currently dt_cook_op2() is an exception to this + * rule: see the comments therein for how this issue is resolved. + * + * The dt_cook_* routines are responsible for (at minimum) setting the final + * node type (dn_ctfp/dn_type) and attributes (dn_attr). If dn_ctfp/dn_type + * are set manually (i.e. not by one of the type assignment functions), then + * the DT_NF_COOKED flag must be set manually on the node. + * + * The cooking pass can be applied to the same parse tree more than once (used + * in the case of a comma-separated list of probe descriptions). As such, the + * cook routines must not perform any parse tree transformations which would + * be invalid if the tree were subsequently cooked using a different context. + * + * The dn_ctfp and dn_type fields form the type of the node. This tuple can + * take on the following set of values, which form our type invariants: + * + * 1. dn_ctfp = NULL, dn_type = CTF_ERR + * + * In this state, the node has unknown type and is not yet cooked. The + * DT_NF_COOKED flag is not yet set on the node. + * + * 2. dn_ctfp = DT_DYN_CTFP(dtp), dn_type = DT_DYN_TYPE(dtp) + * + * In this state, the node is a dynamic D type. This means that generic + * operations are not valid on this node and only code that knows how to + * examine the inner details of the node can operate on it. A <DYN> node + * must have dn_ident set to point to an identifier describing the object + * and its type. The DT_NF_REF flag is set for all nodes of type <DYN>. + * At present, the D compiler uses the <DYN> type for: + * + * - associative arrays that do not yet have a value type defined + * - translated data (i.e. the result of the xlate operator) + * - aggregations + * + * 3. dn_ctfp = DT_STR_CTFP(dtp), dn_type = DT_STR_TYPE(dtp) + * + * In this state, the node is of type D string. The string type is really + * a char[0] typedef, but requires special handling throughout the compiler. + * + * 4. dn_ctfp != NULL, dn_type = any other type ID + * + * In this state, the node is of some known D/CTF type. The normal libctf + * APIs can be used to learn more about the type name or structure. When + * the type is assigned, the DT_NF_SIGNED, DT_NF_REF, and DT_NF_BITFIELD + * flags cache the corresponding attributes of the underlying CTF type. + */ + +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <limits.h> +#include <setjmp.h> +#include <strings.h> +#include <assert.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> +#include <errno.h> +#include <ctype.h> + +#include <dt_impl.h> +#include <dt_grammar.h> +#include <dt_module.h> +#include <dt_provider.h> +#include <dt_string.h> +#include <dt_as.h> + +dt_pcb_t *yypcb; /* current control block for parser */ +dt_node_t *yypragma; /* lex token list for control lines */ +char yyintprefix; /* int token macro prefix (+/-) */ +char yyintsuffix[4]; /* int token suffix string [uU][lL] */ +int yyintdecimal; /* int token format flag (1=decimal, 0=octal/hex) */ + +static const char * +opstr(int op) +{ + switch (op) { + case DT_TOK_COMMA: return (","); + case DT_TOK_ELLIPSIS: return ("..."); + case DT_TOK_ASGN: return ("="); + case DT_TOK_ADD_EQ: return ("+="); + case DT_TOK_SUB_EQ: return ("-="); + case DT_TOK_MUL_EQ: return ("*="); + case DT_TOK_DIV_EQ: return ("/="); + case DT_TOK_MOD_EQ: return ("%="); + case DT_TOK_AND_EQ: return ("&="); + case DT_TOK_XOR_EQ: return ("^="); + case DT_TOK_OR_EQ: return ("|="); + case DT_TOK_LSH_EQ: return ("<<="); + case DT_TOK_RSH_EQ: return (">>="); + case DT_TOK_QUESTION: return ("?"); + case DT_TOK_COLON: return (":"); + case DT_TOK_LOR: return ("||"); + case DT_TOK_LXOR: return ("^^"); + case DT_TOK_LAND: return ("&&"); + case DT_TOK_BOR: return ("|"); + case DT_TOK_XOR: return ("^"); + case DT_TOK_BAND: return ("&"); + case DT_TOK_EQU: return ("=="); + case DT_TOK_NEQ: return ("!="); + case DT_TOK_LT: return ("<"); + case DT_TOK_LE: return ("<="); + case DT_TOK_GT: return (">"); + case DT_TOK_GE: return (">="); + case DT_TOK_LSH: return ("<<"); + case DT_TOK_RSH: return (">>"); + case DT_TOK_ADD: return ("+"); + case DT_TOK_SUB: return ("-"); + case DT_TOK_MUL: return ("*"); + case DT_TOK_DIV: return ("/"); + case DT_TOK_MOD: return ("%"); + case DT_TOK_LNEG: return ("!"); + case DT_TOK_BNEG: return ("~"); + case DT_TOK_ADDADD: return ("++"); + case DT_TOK_PREINC: return ("++"); + case DT_TOK_POSTINC: return ("++"); + case DT_TOK_SUBSUB: return ("--"); + case DT_TOK_PREDEC: return ("--"); + case DT_TOK_POSTDEC: return ("--"); + case DT_TOK_IPOS: return ("+"); + case DT_TOK_INEG: return ("-"); + case DT_TOK_DEREF: return ("*"); + case DT_TOK_ADDROF: return ("&"); + case DT_TOK_OFFSETOF: return ("offsetof"); + case DT_TOK_SIZEOF: return ("sizeof"); + case DT_TOK_STRINGOF: return ("stringof"); + case DT_TOK_XLATE: return ("xlate"); + case DT_TOK_LPAR: return ("("); + case DT_TOK_RPAR: return (")"); + case DT_TOK_LBRAC: return ("["); + case DT_TOK_RBRAC: return ("]"); + case DT_TOK_PTR: return ("->"); + case DT_TOK_DOT: return ("."); + case DT_TOK_STRING: return ("<string>"); + case DT_TOK_IDENT: return ("<ident>"); + case DT_TOK_TNAME: return ("<type>"); + case DT_TOK_INT: return ("<int>"); + default: return ("<?>"); + } +} + +int +dt_type_lookup(const char *s, dtrace_typeinfo_t *tip) +{ + static const char delimiters[] = " \t\n\r\v\f*`"; + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + const char *p, *q, *r, *end, *obj; + + for (p = s, end = s + strlen(s); *p != '\0'; p = q) { + while (isspace(*p)) + p++; /* skip leading whitespace prior to token */ + + if (p == end || (q = strpbrk(p + 1, delimiters)) == NULL) + break; /* empty string or single token remaining */ + + if (*q == '`') { + char *object = alloca((size_t)(q - p) + 1); + char *type = alloca((size_t)(end - s) + 1); + + /* + * Copy from the start of the token (p) to the location + * backquote (q) to extract the nul-terminated object. + */ + bcopy(p, object, (size_t)(q - p)); + object[(size_t)(q - p)] = '\0'; + + /* + * Copy the original string up to the start of this + * token (p) into type, and then concatenate everything + * after q. This is the type name without the object. + */ + bcopy(s, type, (size_t)(p - s)); + bcopy(q + 1, type + (size_t)(p - s), strlen(q + 1) + 1); + + /* + * There may be at most three delimeters. The second + * delimeter is usually used to distinguish the type + * within a given module, however, there could be a link + * map id on the scene in which case that delimeter + * would be the third. We determine presence of the lmid + * if it rouglhly meets the from LM[0-9] + */ + if ((r = strchr(q + 1, '`')) != NULL && + ((r = strchr(r + 1, '`')) != NULL)) { + if (strchr(r + 1, '`') != NULL) + return (dt_set_errno(dtp, + EDT_BADSCOPE)); + if (q[1] != 'L' || q[2] != 'M') + return (dt_set_errno(dtp, + EDT_BADSCOPE)); + } + + return (dtrace_lookup_by_type(dtp, object, type, tip)); + } + } + + if (yypcb->pcb_idepth != 0) + obj = DTRACE_OBJ_CDEFS; + else + obj = DTRACE_OBJ_EVERY; + + return (dtrace_lookup_by_type(dtp, obj, s, tip)); +} + +/* + * When we parse type expressions or parse an expression with unary "&", we + * need to find a type that is a pointer to a previously known type. + * Unfortunately CTF is limited to a per-container view, so ctf_type_pointer() + * alone does not suffice for our needs. We provide a more intelligent wrapper + * for the compiler that attempts to compute a pointer to either the given type + * or its base (that is, we try both "foo_t *" and "struct foo *"), and also + * to potentially construct the required type on-the-fly. + */ +int +dt_type_pointer(dtrace_typeinfo_t *tip) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + ctf_file_t *ctfp = tip->dtt_ctfp; + ctf_id_t type = tip->dtt_type; + ctf_id_t base = ctf_type_resolve(ctfp, type); + uint_t bflags = tip->dtt_flags; + + dt_module_t *dmp; + ctf_id_t ptr; + + if ((ptr = ctf_type_pointer(ctfp, type)) != CTF_ERR || + (ptr = ctf_type_pointer(ctfp, base)) != CTF_ERR) { + tip->dtt_type = ptr; + return (0); + } + + if (yypcb->pcb_idepth != 0) + dmp = dtp->dt_cdefs; + else + dmp = dtp->dt_ddefs; + + if (ctfp != dmp->dm_ctfp && ctfp != ctf_parent_file(dmp->dm_ctfp) && + (type = ctf_add_type(dmp->dm_ctfp, ctfp, type)) == CTF_ERR) { + dtp->dt_ctferr = ctf_errno(dmp->dm_ctfp); + return (dt_set_errno(dtp, EDT_CTF)); + } + + ptr = ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, type); + + if (ptr == CTF_ERR || ctf_update(dmp->dm_ctfp) == CTF_ERR) { + dtp->dt_ctferr = ctf_errno(dmp->dm_ctfp); + return (dt_set_errno(dtp, EDT_CTF)); + } + + tip->dtt_object = dmp->dm_name; + tip->dtt_ctfp = dmp->dm_ctfp; + tip->dtt_type = ptr; + tip->dtt_flags = bflags; + + return (0); +} + +const char * +dt_type_name(ctf_file_t *ctfp, ctf_id_t type, char *buf, size_t len) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + if (ctfp == DT_FPTR_CTFP(dtp) && type == DT_FPTR_TYPE(dtp)) + (void) snprintf(buf, len, "function pointer"); + else if (ctfp == DT_FUNC_CTFP(dtp) && type == DT_FUNC_TYPE(dtp)) + (void) snprintf(buf, len, "function"); + else if (ctfp == DT_DYN_CTFP(dtp) && type == DT_DYN_TYPE(dtp)) + (void) snprintf(buf, len, "dynamic variable"); + else if (ctfp == NULL) + (void) snprintf(buf, len, "<none>"); + else if (ctf_type_name(ctfp, type, buf, len) == NULL) + (void) snprintf(buf, len, "unknown"); + + return (buf); +} + +/* + * Perform the "usual arithmetic conversions" to determine which of the two + * input operand types should be promoted and used as a result type. The + * rules for this are described in ISOC[6.3.1.8] and K&R[A6.5]. + */ +static void +dt_type_promote(dt_node_t *lp, dt_node_t *rp, ctf_file_t **ofp, ctf_id_t *otype) +{ + ctf_file_t *lfp = lp->dn_ctfp; + ctf_id_t ltype = lp->dn_type; + + ctf_file_t *rfp = rp->dn_ctfp; + ctf_id_t rtype = rp->dn_type; + + ctf_id_t lbase = ctf_type_resolve(lfp, ltype); + uint_t lkind = ctf_type_kind(lfp, lbase); + + ctf_id_t rbase = ctf_type_resolve(rfp, rtype); + uint_t rkind = ctf_type_kind(rfp, rbase); + + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + ctf_encoding_t le, re; + uint_t lrank, rrank; + + assert(lkind == CTF_K_INTEGER || lkind == CTF_K_ENUM); + assert(rkind == CTF_K_INTEGER || rkind == CTF_K_ENUM); + + if (lkind == CTF_K_ENUM) { + lfp = DT_INT_CTFP(dtp); + ltype = lbase = DT_INT_TYPE(dtp); + } + + if (rkind == CTF_K_ENUM) { + rfp = DT_INT_CTFP(dtp); + rtype = rbase = DT_INT_TYPE(dtp); + } + + if (ctf_type_encoding(lfp, lbase, &le) == CTF_ERR) { + yypcb->pcb_hdl->dt_ctferr = ctf_errno(lfp); + longjmp(yypcb->pcb_jmpbuf, EDT_CTF); + } + + if (ctf_type_encoding(rfp, rbase, &re) == CTF_ERR) { + yypcb->pcb_hdl->dt_ctferr = ctf_errno(rfp); + longjmp(yypcb->pcb_jmpbuf, EDT_CTF); + } + + /* + * Compute an integer rank based on the size and unsigned status. + * If rank is identical, pick the "larger" of the equivalent types + * which we define as having a larger base ctf_id_t. If rank is + * different, pick the type with the greater rank. + */ + lrank = le.cte_bits + ((le.cte_format & CTF_INT_SIGNED) == 0); + rrank = re.cte_bits + ((re.cte_format & CTF_INT_SIGNED) == 0); + + if (lrank == rrank) { + if (lbase - rbase < 0) + goto return_rtype; + else + goto return_ltype; + } else if (lrank > rrank) { + goto return_ltype; + } else + goto return_rtype; + +return_ltype: + *ofp = lfp; + *otype = ltype; + return; + +return_rtype: + *ofp = rfp; + *otype = rtype; +} + +void +dt_node_promote(dt_node_t *lp, dt_node_t *rp, dt_node_t *dnp) +{ + dt_type_promote(lp, rp, &dnp->dn_ctfp, &dnp->dn_type); + dt_node_type_assign(dnp, dnp->dn_ctfp, dnp->dn_type, B_FALSE); + dt_node_attr_assign(dnp, dt_attr_min(lp->dn_attr, rp->dn_attr)); +} + +const char * +dt_node_name(const dt_node_t *dnp, char *buf, size_t len) +{ + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + const char *prefix = "", *suffix = ""; + const dtrace_syminfo_t *dts; + char *s; + + switch (dnp->dn_kind) { + case DT_NODE_INT: + (void) snprintf(buf, len, "integer constant 0x%llx", + (u_longlong_t)dnp->dn_value); + break; + case DT_NODE_STRING: + s = strchr2esc(dnp->dn_string, strlen(dnp->dn_string)); + (void) snprintf(buf, len, "string constant \"%s\"", + s != NULL ? s : dnp->dn_string); + free(s); + break; + case DT_NODE_IDENT: + (void) snprintf(buf, len, "identifier %s", dnp->dn_string); + break; + case DT_NODE_VAR: + case DT_NODE_FUNC: + case DT_NODE_AGG: + case DT_NODE_INLINE: + switch (dnp->dn_ident->di_kind) { + case DT_IDENT_FUNC: + case DT_IDENT_AGGFUNC: + case DT_IDENT_ACTFUNC: + suffix = "( )"; + break; + case DT_IDENT_AGG: + prefix = "@"; + break; + } + (void) snprintf(buf, len, "%s %s%s%s", + dt_idkind_name(dnp->dn_ident->di_kind), + prefix, dnp->dn_ident->di_name, suffix); + break; + case DT_NODE_SYM: + dts = dnp->dn_ident->di_data; + (void) snprintf(buf, len, "symbol %s`%s", + dts->dts_object, dts->dts_name); + break; + case DT_NODE_TYPE: + (void) snprintf(buf, len, "type %s", + dt_node_type_name(dnp, n1, sizeof (n1))); + break; + case DT_NODE_OP1: + case DT_NODE_OP2: + case DT_NODE_OP3: + (void) snprintf(buf, len, "operator %s", opstr(dnp->dn_op)); + break; + case DT_NODE_DEXPR: + case DT_NODE_DFUNC: + if (dnp->dn_expr) + return (dt_node_name(dnp->dn_expr, buf, len)); + (void) snprintf(buf, len, "%s", "statement"); + break; + case DT_NODE_PDESC: + if (dnp->dn_desc->dtpd_id == 0) { + (void) snprintf(buf, len, + "probe description %s:%s:%s:%s", + dnp->dn_desc->dtpd_provider, dnp->dn_desc->dtpd_mod, + dnp->dn_desc->dtpd_func, dnp->dn_desc->dtpd_name); + } else { + (void) snprintf(buf, len, "probe description %u", + dnp->dn_desc->dtpd_id); + } + break; + case DT_NODE_CLAUSE: + (void) snprintf(buf, len, "%s", "clause"); + break; + case DT_NODE_MEMBER: + (void) snprintf(buf, len, "member %s", dnp->dn_membname); + break; + case DT_NODE_XLATOR: + (void) snprintf(buf, len, "translator <%s> (%s)", + dt_type_name(dnp->dn_xlator->dx_dst_ctfp, + dnp->dn_xlator->dx_dst_type, n1, sizeof (n1)), + dt_type_name(dnp->dn_xlator->dx_src_ctfp, + dnp->dn_xlator->dx_src_type, n2, sizeof (n2))); + break; + case DT_NODE_PROG: + (void) snprintf(buf, len, "%s", "program"); + break; + default: + (void) snprintf(buf, len, "node <%u>", dnp->dn_kind); + break; + } + + return (buf); +} + +/* + * dt_node_xalloc() can be used to create new parse nodes from any libdtrace + * caller. The caller is responsible for assigning dn_link appropriately. + */ +dt_node_t * +dt_node_xalloc(dtrace_hdl_t *dtp, int kind) +{ + dt_node_t *dnp = dt_alloc(dtp, sizeof (dt_node_t)); + + if (dnp == NULL) + return (NULL); + + dnp->dn_ctfp = NULL; + dnp->dn_type = CTF_ERR; + dnp->dn_kind = (uchar_t)kind; + dnp->dn_flags = 0; + dnp->dn_op = 0; + dnp->dn_line = -1; + dnp->dn_reg = -1; + dnp->dn_attr = _dtrace_defattr; + dnp->dn_list = NULL; + dnp->dn_link = NULL; + bzero(&dnp->dn_u, sizeof (dnp->dn_u)); + + return (dnp); +} + +/* + * dt_node_alloc() is used to create new parse nodes from the parser. It + * assigns the node location based on the current lexer line number and places + * the new node on the default allocation list. If allocation fails, we + * automatically longjmp the caller back to the enclosing compilation call. + */ +static dt_node_t * +dt_node_alloc(int kind) +{ + dt_node_t *dnp = dt_node_xalloc(yypcb->pcb_hdl, kind); + + if (dnp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dnp->dn_line = yylineno; + dnp->dn_link = yypcb->pcb_list; + yypcb->pcb_list = dnp; + + return (dnp); +} + +void +dt_node_free(dt_node_t *dnp) +{ + uchar_t kind = dnp->dn_kind; + + dnp->dn_kind = DT_NODE_FREE; + + switch (kind) { + case DT_NODE_STRING: + case DT_NODE_IDENT: + case DT_NODE_TYPE: + free(dnp->dn_string); + dnp->dn_string = NULL; + break; + + case DT_NODE_VAR: + case DT_NODE_FUNC: + case DT_NODE_PROBE: + if (dnp->dn_ident != NULL) { + if (dnp->dn_ident->di_flags & DT_IDFLG_ORPHAN) + dt_ident_destroy(dnp->dn_ident); + dnp->dn_ident = NULL; + } + dt_node_list_free(&dnp->dn_args); + break; + + case DT_NODE_OP1: + if (dnp->dn_child != NULL) { + dt_node_free(dnp->dn_child); + dnp->dn_child = NULL; + } + break; + + case DT_NODE_OP3: + if (dnp->dn_expr != NULL) { + dt_node_free(dnp->dn_expr); + dnp->dn_expr = NULL; + } + /*FALLTHRU*/ + case DT_NODE_OP2: + if (dnp->dn_left != NULL) { + dt_node_free(dnp->dn_left); + dnp->dn_left = NULL; + } + if (dnp->dn_right != NULL) { + dt_node_free(dnp->dn_right); + dnp->dn_right = NULL; + } + break; + + case DT_NODE_DEXPR: + case DT_NODE_DFUNC: + if (dnp->dn_expr != NULL) { + dt_node_free(dnp->dn_expr); + dnp->dn_expr = NULL; + } + break; + + case DT_NODE_AGG: + if (dnp->dn_aggfun != NULL) { + dt_node_free(dnp->dn_aggfun); + dnp->dn_aggfun = NULL; + } + dt_node_list_free(&dnp->dn_aggtup); + break; + + case DT_NODE_PDESC: + free(dnp->dn_spec); + dnp->dn_spec = NULL; + free(dnp->dn_desc); + dnp->dn_desc = NULL; + break; + + case DT_NODE_CLAUSE: + if (dnp->dn_pred != NULL) + dt_node_free(dnp->dn_pred); + if (dnp->dn_locals != NULL) + dt_idhash_destroy(dnp->dn_locals); + dt_node_list_free(&dnp->dn_pdescs); + dt_node_list_free(&dnp->dn_acts); + break; + + case DT_NODE_MEMBER: + free(dnp->dn_membname); + dnp->dn_membname = NULL; + if (dnp->dn_membexpr != NULL) { + dt_node_free(dnp->dn_membexpr); + dnp->dn_membexpr = NULL; + } + break; + + case DT_NODE_PROVIDER: + dt_node_list_free(&dnp->dn_probes); + free(dnp->dn_provname); + dnp->dn_provname = NULL; + break; + + case DT_NODE_PROG: + dt_node_list_free(&dnp->dn_list); + break; + } +} + +void +dt_node_attr_assign(dt_node_t *dnp, dtrace_attribute_t attr) +{ + if ((yypcb->pcb_cflags & DTRACE_C_EATTR) && + (dt_attr_cmp(attr, yypcb->pcb_amin) < 0)) { + char a[DTRACE_ATTR2STR_MAX]; + char s[BUFSIZ]; + + dnerror(dnp, D_ATTR_MIN, "attributes for %s (%s) are less than " + "predefined minimum\n", dt_node_name(dnp, s, sizeof (s)), + dtrace_attr2str(attr, a, sizeof (a))); + } + + dnp->dn_attr = attr; +} + +void +dt_node_type_assign(dt_node_t *dnp, ctf_file_t *fp, ctf_id_t type, + boolean_t user) +{ + ctf_id_t base = ctf_type_resolve(fp, type); + uint_t kind = ctf_type_kind(fp, base); + ctf_encoding_t e; + + dnp->dn_flags &= + ~(DT_NF_SIGNED | DT_NF_REF | DT_NF_BITFIELD | DT_NF_USERLAND); + + if (kind == CTF_K_INTEGER && ctf_type_encoding(fp, base, &e) == 0) { + size_t size = e.cte_bits / NBBY; + + if (size > 8 || (e.cte_bits % NBBY) != 0 || (size & (size - 1))) + dnp->dn_flags |= DT_NF_BITFIELD; + + if (e.cte_format & CTF_INT_SIGNED) + dnp->dn_flags |= DT_NF_SIGNED; + } + + if (kind == CTF_K_FLOAT && ctf_type_encoding(fp, base, &e) == 0) { + if (e.cte_bits / NBBY > sizeof (uint64_t)) + dnp->dn_flags |= DT_NF_REF; + } + + if (kind == CTF_K_STRUCT || kind == CTF_K_UNION || + kind == CTF_K_FORWARD || + kind == CTF_K_ARRAY || kind == CTF_K_FUNCTION) + dnp->dn_flags |= DT_NF_REF; + else if (yypcb != NULL && fp == DT_DYN_CTFP(yypcb->pcb_hdl) && + type == DT_DYN_TYPE(yypcb->pcb_hdl)) + dnp->dn_flags |= DT_NF_REF; + + if (user) + dnp->dn_flags |= DT_NF_USERLAND; + + dnp->dn_flags |= DT_NF_COOKED; + dnp->dn_ctfp = fp; + dnp->dn_type = type; +} + +void +dt_node_type_propagate(const dt_node_t *src, dt_node_t *dst) +{ + assert(src->dn_flags & DT_NF_COOKED); + dst->dn_flags = src->dn_flags & ~DT_NF_LVALUE; + dst->dn_ctfp = src->dn_ctfp; + dst->dn_type = src->dn_type; +} + +const char * +dt_node_type_name(const dt_node_t *dnp, char *buf, size_t len) +{ + if (dt_node_is_dynamic(dnp) && dnp->dn_ident != NULL) { + (void) snprintf(buf, len, "%s", + dt_idkind_name(dt_ident_resolve(dnp->dn_ident)->di_kind)); + return (buf); + } + + if (dnp->dn_flags & DT_NF_USERLAND) { + size_t n = snprintf(buf, len, "userland "); + len = len > n ? len - n : 0; + (void) dt_type_name(dnp->dn_ctfp, dnp->dn_type, buf + n, len); + return (buf); + } + + return (dt_type_name(dnp->dn_ctfp, dnp->dn_type, buf, len)); +} + +size_t +dt_node_type_size(const dt_node_t *dnp) +{ + ctf_id_t base; + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + if (dnp->dn_kind == DT_NODE_STRING) + return (strlen(dnp->dn_string) + 1); + + if (dt_node_is_dynamic(dnp) && dnp->dn_ident != NULL) + return (dt_ident_size(dnp->dn_ident)); + + base = ctf_type_resolve(dnp->dn_ctfp, dnp->dn_type); + + if (ctf_type_kind(dnp->dn_ctfp, base) == CTF_K_FORWARD) + return (0); + + /* + * Here we have a 32-bit user pointer that is being used with a 64-bit + * kernel. When we're using it and its tagged as a userland reference -- + * then we need to keep it as a 32-bit pointer. However, if we are + * referring to it as a kernel address, eg. being used after a copyin() + * then we need to make sure that we actually return the kernel's size + * of a pointer, 8 bytes. + */ + if (ctf_type_kind(dnp->dn_ctfp, base) == CTF_K_POINTER && + ctf_getmodel(dnp->dn_ctfp) == CTF_MODEL_ILP32 && + !(dnp->dn_flags & DT_NF_USERLAND) && + dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64) + return (8); + + return (ctf_type_size(dnp->dn_ctfp, dnp->dn_type)); +} + +/* + * Determine if the specified parse tree node references an identifier of the + * specified kind, and if so return a pointer to it; otherwise return NULL. + * This function resolves the identifier itself, following through any inlines. + */ +dt_ident_t * +dt_node_resolve(const dt_node_t *dnp, uint_t idkind) +{ + dt_ident_t *idp; + + switch (dnp->dn_kind) { + case DT_NODE_VAR: + case DT_NODE_SYM: + case DT_NODE_FUNC: + case DT_NODE_AGG: + case DT_NODE_INLINE: + case DT_NODE_PROBE: + idp = dt_ident_resolve(dnp->dn_ident); + return (idp->di_kind == idkind ? idp : NULL); + } + + if (dt_node_is_dynamic(dnp)) { + idp = dt_ident_resolve(dnp->dn_ident); + return (idp->di_kind == idkind ? idp : NULL); + } + + return (NULL); +} + +size_t +dt_node_sizeof(const dt_node_t *dnp) +{ + dtrace_syminfo_t *sip; + GElf_Sym sym; + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + /* + * The size of the node as used for the sizeof() operator depends on + * the kind of the node. If the node is a SYM, the size is obtained + * from the symbol table; if it is not a SYM, the size is determined + * from the node's type. This is slightly different from C's sizeof() + * operator in that (for example) when applied to a function, sizeof() + * will evaluate to the length of the function rather than the size of + * the function type. + */ + if (dnp->dn_kind != DT_NODE_SYM) + return (dt_node_type_size(dnp)); + + sip = dnp->dn_ident->di_data; + + if (dtrace_lookup_by_name(dtp, sip->dts_object, + sip->dts_name, &sym, NULL) == -1) + return (0); + + return (sym.st_size); +} + +int +dt_node_is_integer(const dt_node_t *dnp) +{ + ctf_file_t *fp = dnp->dn_ctfp; + ctf_encoding_t e; + ctf_id_t type; + uint_t kind; + + assert(dnp->dn_flags & DT_NF_COOKED); + + type = ctf_type_resolve(fp, dnp->dn_type); + kind = ctf_type_kind(fp, type); + + if (kind == CTF_K_INTEGER && + ctf_type_encoding(fp, type, &e) == 0 && IS_VOID(e)) + return (0); /* void integer */ + + return (kind == CTF_K_INTEGER || kind == CTF_K_ENUM); +} + +int +dt_node_is_float(const dt_node_t *dnp) +{ + ctf_file_t *fp = dnp->dn_ctfp; + ctf_encoding_t e; + ctf_id_t type; + uint_t kind; + + assert(dnp->dn_flags & DT_NF_COOKED); + + type = ctf_type_resolve(fp, dnp->dn_type); + kind = ctf_type_kind(fp, type); + + return (kind == CTF_K_FLOAT && + ctf_type_encoding(dnp->dn_ctfp, type, &e) == 0 && ( + e.cte_format == CTF_FP_SINGLE || e.cte_format == CTF_FP_DOUBLE || + e.cte_format == CTF_FP_LDOUBLE)); +} + +int +dt_node_is_scalar(const dt_node_t *dnp) +{ + ctf_file_t *fp = dnp->dn_ctfp; + ctf_encoding_t e; + ctf_id_t type; + uint_t kind; + + assert(dnp->dn_flags & DT_NF_COOKED); + + type = ctf_type_resolve(fp, dnp->dn_type); + kind = ctf_type_kind(fp, type); + + if (kind == CTF_K_INTEGER && + ctf_type_encoding(fp, type, &e) == 0 && IS_VOID(e)) + return (0); /* void cannot be used as a scalar */ + + return (kind == CTF_K_INTEGER || kind == CTF_K_ENUM || + kind == CTF_K_POINTER); +} + +int +dt_node_is_arith(const dt_node_t *dnp) +{ + ctf_file_t *fp = dnp->dn_ctfp; + ctf_encoding_t e; + ctf_id_t type; + uint_t kind; + + assert(dnp->dn_flags & DT_NF_COOKED); + + type = ctf_type_resolve(fp, dnp->dn_type); + kind = ctf_type_kind(fp, type); + + if (kind == CTF_K_INTEGER) + return (ctf_type_encoding(fp, type, &e) == 0 && !IS_VOID(e)); + else + return (kind == CTF_K_ENUM); +} + +int +dt_node_is_vfptr(const dt_node_t *dnp) +{ + ctf_file_t *fp = dnp->dn_ctfp; + ctf_encoding_t e; + ctf_id_t type; + uint_t kind; + + assert(dnp->dn_flags & DT_NF_COOKED); + + type = ctf_type_resolve(fp, dnp->dn_type); + if (ctf_type_kind(fp, type) != CTF_K_POINTER) + return (0); /* type is not a pointer */ + + type = ctf_type_resolve(fp, ctf_type_reference(fp, type)); + kind = ctf_type_kind(fp, type); + + return (kind == CTF_K_FUNCTION || (kind == CTF_K_INTEGER && + ctf_type_encoding(fp, type, &e) == 0 && IS_VOID(e))); +} + +int +dt_node_is_dynamic(const dt_node_t *dnp) +{ + if (dnp->dn_kind == DT_NODE_VAR && + (dnp->dn_ident->di_flags & DT_IDFLG_INLINE)) { + const dt_idnode_t *inp = dnp->dn_ident->di_iarg; + return (inp->din_root ? dt_node_is_dynamic(inp->din_root) : 0); + } + + return (dnp->dn_ctfp == DT_DYN_CTFP(yypcb->pcb_hdl) && + dnp->dn_type == DT_DYN_TYPE(yypcb->pcb_hdl)); +} + +int +dt_node_is_string(const dt_node_t *dnp) +{ + return (dnp->dn_ctfp == DT_STR_CTFP(yypcb->pcb_hdl) && + dnp->dn_type == DT_STR_TYPE(yypcb->pcb_hdl)); +} + +int +dt_node_is_stack(const dt_node_t *dnp) +{ + return (dnp->dn_ctfp == DT_STACK_CTFP(yypcb->pcb_hdl) && + dnp->dn_type == DT_STACK_TYPE(yypcb->pcb_hdl)); +} + +int +dt_node_is_symaddr(const dt_node_t *dnp) +{ + return (dnp->dn_ctfp == DT_SYMADDR_CTFP(yypcb->pcb_hdl) && + dnp->dn_type == DT_SYMADDR_TYPE(yypcb->pcb_hdl)); +} + +int +dt_node_is_usymaddr(const dt_node_t *dnp) +{ + return (dnp->dn_ctfp == DT_USYMADDR_CTFP(yypcb->pcb_hdl) && + dnp->dn_type == DT_USYMADDR_TYPE(yypcb->pcb_hdl)); +} + +int +dt_node_is_strcompat(const dt_node_t *dnp) +{ + ctf_file_t *fp = dnp->dn_ctfp; + ctf_encoding_t e; + ctf_arinfo_t r; + ctf_id_t base; + uint_t kind; + + assert(dnp->dn_flags & DT_NF_COOKED); + + base = ctf_type_resolve(fp, dnp->dn_type); + kind = ctf_type_kind(fp, base); + + if (kind == CTF_K_POINTER && + (base = ctf_type_reference(fp, base)) != CTF_ERR && + (base = ctf_type_resolve(fp, base)) != CTF_ERR && + ctf_type_encoding(fp, base, &e) == 0 && IS_CHAR(e)) + return (1); /* promote char pointer to string */ + + if (kind == CTF_K_ARRAY && ctf_array_info(fp, base, &r) == 0 && + (base = ctf_type_resolve(fp, r.ctr_contents)) != CTF_ERR && + ctf_type_encoding(fp, base, &e) == 0 && IS_CHAR(e)) + return (1); /* promote char array to string */ + + return (0); +} + +int +dt_node_is_pointer(const dt_node_t *dnp) +{ + ctf_file_t *fp = dnp->dn_ctfp; + uint_t kind; + + assert(dnp->dn_flags & DT_NF_COOKED); + + if (dt_node_is_string(dnp)) + return (0); /* string are pass-by-ref but act like structs */ + + kind = ctf_type_kind(fp, ctf_type_resolve(fp, dnp->dn_type)); + return (kind == CTF_K_POINTER || kind == CTF_K_ARRAY); +} + +int +dt_node_is_void(const dt_node_t *dnp) +{ + ctf_file_t *fp = dnp->dn_ctfp; + ctf_encoding_t e; + ctf_id_t type; + + if (dt_node_is_dynamic(dnp)) + return (0); /* <DYN> is an alias for void but not the same */ + + if (dt_node_is_stack(dnp)) + return (0); + + if (dt_node_is_symaddr(dnp) || dt_node_is_usymaddr(dnp)) + return (0); + + type = ctf_type_resolve(fp, dnp->dn_type); + + return (ctf_type_kind(fp, type) == CTF_K_INTEGER && + ctf_type_encoding(fp, type, &e) == 0 && IS_VOID(e)); +} + +int +dt_node_is_ptrcompat(const dt_node_t *lp, const dt_node_t *rp, + ctf_file_t **fpp, ctf_id_t *tp) +{ + ctf_file_t *lfp = lp->dn_ctfp; + ctf_file_t *rfp = rp->dn_ctfp; + + ctf_id_t lbase = CTF_ERR, rbase = CTF_ERR; + ctf_id_t lref = CTF_ERR, rref = CTF_ERR; + + int lp_is_void, rp_is_void, lp_is_int, rp_is_int, compat; + uint_t lkind, rkind; + ctf_encoding_t e; + ctf_arinfo_t r; + + assert(lp->dn_flags & DT_NF_COOKED); + assert(rp->dn_flags & DT_NF_COOKED); + + if (dt_node_is_dynamic(lp) || dt_node_is_dynamic(rp)) + return (0); /* fail if either node is a dynamic variable */ + + lp_is_int = dt_node_is_integer(lp); + rp_is_int = dt_node_is_integer(rp); + + if (lp_is_int && rp_is_int) + return (0); /* fail if both nodes are integers */ + + if (lp_is_int && (lp->dn_kind != DT_NODE_INT || lp->dn_value != 0)) + return (0); /* fail if lp is an integer that isn't 0 constant */ + + if (rp_is_int && (rp->dn_kind != DT_NODE_INT || rp->dn_value != 0)) + return (0); /* fail if rp is an integer that isn't 0 constant */ + + if ((lp_is_int == 0 && rp_is_int == 0) && ( + (lp->dn_flags & DT_NF_USERLAND) ^ (rp->dn_flags & DT_NF_USERLAND))) + return (0); /* fail if only one pointer is a userland address */ + + /* + * Resolve the left-hand and right-hand types to their base type, and + * then resolve the referenced type as well (assuming the base type + * is CTF_K_POINTER or CTF_K_ARRAY). Otherwise [lr]ref = CTF_ERR. + */ + if (!lp_is_int) { + lbase = ctf_type_resolve(lfp, lp->dn_type); + lkind = ctf_type_kind(lfp, lbase); + + if (lkind == CTF_K_POINTER) { + lref = ctf_type_resolve(lfp, + ctf_type_reference(lfp, lbase)); + } else if (lkind == CTF_K_ARRAY && + ctf_array_info(lfp, lbase, &r) == 0) { + lref = ctf_type_resolve(lfp, r.ctr_contents); + } + } + + if (!rp_is_int) { + rbase = ctf_type_resolve(rfp, rp->dn_type); + rkind = ctf_type_kind(rfp, rbase); + + if (rkind == CTF_K_POINTER) { + rref = ctf_type_resolve(rfp, + ctf_type_reference(rfp, rbase)); + } else if (rkind == CTF_K_ARRAY && + ctf_array_info(rfp, rbase, &r) == 0) { + rref = ctf_type_resolve(rfp, r.ctr_contents); + } + } + + /* + * We know that one or the other type may still be a zero-valued + * integer constant. To simplify the code below, set the integer + * type variables equal to the non-integer types and proceed. + */ + if (lp_is_int) { + lbase = rbase; + lkind = rkind; + lref = rref; + lfp = rfp; + } else if (rp_is_int) { + rbase = lbase; + rkind = lkind; + rref = lref; + rfp = lfp; + } + + lp_is_void = ctf_type_encoding(lfp, lref, &e) == 0 && IS_VOID(e); + rp_is_void = ctf_type_encoding(rfp, rref, &e) == 0 && IS_VOID(e); + + /* + * The types are compatible if both are pointers to the same type, or + * if either pointer is a void pointer. If they are compatible, set + * tp to point to the more specific pointer type and return it. + */ + compat = (lkind == CTF_K_POINTER || lkind == CTF_K_ARRAY) && + (rkind == CTF_K_POINTER || rkind == CTF_K_ARRAY) && + (lp_is_void || rp_is_void || ctf_type_compat(lfp, lref, rfp, rref)); + + if (compat) { + if (fpp != NULL) + *fpp = rp_is_void ? lfp : rfp; + if (tp != NULL) + *tp = rp_is_void ? lbase : rbase; + } + + return (compat); +} + +/* + * The rules for checking argument types against parameter types are described + * in the ANSI-C spec (see K&R[A7.3.2] and K&R[A7.17]). We use the same rule + * set to determine whether associative array arguments match the prototype. + */ +int +dt_node_is_argcompat(const dt_node_t *lp, const dt_node_t *rp) +{ + ctf_file_t *lfp = lp->dn_ctfp; + ctf_file_t *rfp = rp->dn_ctfp; + + assert(lp->dn_flags & DT_NF_COOKED); + assert(rp->dn_flags & DT_NF_COOKED); + + if (dt_node_is_integer(lp) && dt_node_is_integer(rp)) + return (1); /* integer types are compatible */ + + if (dt_node_is_strcompat(lp) && dt_node_is_strcompat(rp)) + return (1); /* string types are compatible */ + + if (dt_node_is_stack(lp) && dt_node_is_stack(rp)) + return (1); /* stack types are compatible */ + + if (dt_node_is_symaddr(lp) && dt_node_is_symaddr(rp)) + return (1); /* symaddr types are compatible */ + + if (dt_node_is_usymaddr(lp) && dt_node_is_usymaddr(rp)) + return (1); /* usymaddr types are compatible */ + + switch (ctf_type_kind(lfp, ctf_type_resolve(lfp, lp->dn_type))) { + case CTF_K_FUNCTION: + case CTF_K_STRUCT: + case CTF_K_UNION: + return (ctf_type_compat(lfp, lp->dn_type, rfp, rp->dn_type)); + default: + return (dt_node_is_ptrcompat(lp, rp, NULL, NULL)); + } +} + +/* + * We provide dt_node_is_posconst() as a convenience routine for callers who + * wish to verify that an argument is a positive non-zero integer constant. + */ +int +dt_node_is_posconst(const dt_node_t *dnp) +{ + return (dnp->dn_kind == DT_NODE_INT && dnp->dn_value != 0 && ( + (dnp->dn_flags & DT_NF_SIGNED) == 0 || (int64_t)dnp->dn_value > 0)); +} + +int +dt_node_is_actfunc(const dt_node_t *dnp) +{ + return (dnp->dn_kind == DT_NODE_FUNC && + dnp->dn_ident->di_kind == DT_IDENT_ACTFUNC); +} + +/* + * The original rules for integer constant typing are described in K&R[A2.5.1]. + * However, since we support long long, we instead use the rules from ISO C99 + * clause 6.4.4.1 since that is where long longs are formally described. The + * rules require us to know whether the constant was specified in decimal or + * in octal or hex, which we do by looking at our lexer's 'yyintdecimal' flag. + * The type of an integer constant is the first of the corresponding list in + * which its value can be represented: + * + * unsuffixed decimal: int, long, long long + * unsuffixed oct/hex: int, unsigned int, long, unsigned long, + * long long, unsigned long long + * suffix [uU]: unsigned int, unsigned long, unsigned long long + * suffix [lL] decimal: long, long long + * suffix [lL] oct/hex: long, unsigned long, long long, unsigned long long + * suffix [uU][Ll]: unsigned long, unsigned long long + * suffix ll/LL decimal: long long + * suffix ll/LL oct/hex: long long, unsigned long long + * suffix [uU][ll/LL]: unsigned long long + * + * Given that our lexer has already validated the suffixes by regexp matching, + * there is an obvious way to concisely encode these rules: construct an array + * of the types in the order int, unsigned int, long, unsigned long, long long, + * unsigned long long. Compute an integer array starting index based on the + * suffix (e.g. none = 0, u = 1, ull = 5), and compute an increment based on + * the specifier (dec/oct/hex) and suffix (u). Then iterate from the starting + * index to the end, advancing using the increment, and searching until we + * find a limit that matches or we run out of choices (overflow). To make it + * even faster, we precompute the table of type information in dtrace_open(). + */ +dt_node_t * +dt_node_int(uintmax_t value) +{ + dt_node_t *dnp = dt_node_alloc(DT_NODE_INT); + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + int n = (yyintdecimal | (yyintsuffix[0] == 'u')) + 1; + int i = 0; + + const char *p; + char c; + + dnp->dn_op = DT_TOK_INT; + dnp->dn_value = value; + + for (p = yyintsuffix; (c = *p) != '\0'; p++) { + if (c == 'U' || c == 'u') + i += 1; + else if (c == 'L' || c == 'l') + i += 2; + } + + for (; i < sizeof (dtp->dt_ints) / sizeof (dtp->dt_ints[0]); i += n) { + if (value <= dtp->dt_ints[i].did_limit) { + dt_node_type_assign(dnp, + dtp->dt_ints[i].did_ctfp, + dtp->dt_ints[i].did_type, B_FALSE); + + /* + * If a prefix character is present in macro text, add + * in the corresponding operator node (see dt_lex.l). + */ + switch (yyintprefix) { + case '+': + return (dt_node_op1(DT_TOK_IPOS, dnp)); + case '-': + return (dt_node_op1(DT_TOK_INEG, dnp)); + default: + return (dnp); + } + } + } + + xyerror(D_INT_OFLOW, "integer constant 0x%llx cannot be represented " + "in any built-in integral type\n", (u_longlong_t)value); + /*NOTREACHED*/ + return (NULL); /* keep gcc happy */ +} + +dt_node_t * +dt_node_string(char *string) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_node_t *dnp; + + if (string == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dnp = dt_node_alloc(DT_NODE_STRING); + dnp->dn_op = DT_TOK_STRING; + dnp->dn_string = string; + dt_node_type_assign(dnp, DT_STR_CTFP(dtp), DT_STR_TYPE(dtp), B_FALSE); + + return (dnp); +} + +dt_node_t * +dt_node_ident(char *name) +{ + dt_ident_t *idp; + dt_node_t *dnp; + + if (name == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + /* + * If the identifier is an inlined integer constant, then create an INT + * node that is a clone of the inline parse tree node and return that + * immediately, allowing this inline to be used in parsing contexts + * that require constant expressions (e.g. scalar array sizes). + */ + if ((idp = dt_idstack_lookup(&yypcb->pcb_globals, name)) != NULL && + (idp->di_flags & DT_IDFLG_INLINE)) { + dt_idnode_t *inp = idp->di_iarg; + + if (inp->din_root != NULL && + inp->din_root->dn_kind == DT_NODE_INT) { + free(name); + + dnp = dt_node_alloc(DT_NODE_INT); + dnp->dn_op = DT_TOK_INT; + dnp->dn_value = inp->din_root->dn_value; + dt_node_type_propagate(inp->din_root, dnp); + + return (dnp); + } + } + + dnp = dt_node_alloc(DT_NODE_IDENT); + dnp->dn_op = name[0] == '@' ? DT_TOK_AGG : DT_TOK_IDENT; + dnp->dn_string = name; + + return (dnp); +} + +/* + * Create an empty node of type corresponding to the given declaration. + * Explicit references to user types (C or D) are assigned the default + * stability; references to other types are _dtrace_typattr (Private). + */ +dt_node_t * +dt_node_type(dt_decl_t *ddp) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dtrace_typeinfo_t dtt; + dt_node_t *dnp; + char *name = NULL; + int err; + + /* + * If 'ddp' is NULL, we get a decl by popping the decl stack. This + * form of dt_node_type() is used by parameter rules in dt_grammar.y. + */ + if (ddp == NULL) + ddp = dt_decl_pop_param(&name); + + err = dt_decl_type(ddp, &dtt); + dt_decl_free(ddp); + + if (err != 0) { + free(name); + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + } + + dnp = dt_node_alloc(DT_NODE_TYPE); + dnp->dn_op = DT_TOK_IDENT; + dnp->dn_string = name; + + dt_node_type_assign(dnp, dtt.dtt_ctfp, dtt.dtt_type, dtt.dtt_flags); + + if (dtt.dtt_ctfp == dtp->dt_cdefs->dm_ctfp || + dtt.dtt_ctfp == dtp->dt_ddefs->dm_ctfp) + dt_node_attr_assign(dnp, _dtrace_defattr); + else + dt_node_attr_assign(dnp, _dtrace_typattr); + + return (dnp); +} + +/* + * Create a type node corresponding to a varargs (...) parameter by just + * assigning it type CTF_ERR. The decl processing code will handle this. + */ +dt_node_t * +dt_node_vatype(void) +{ + dt_node_t *dnp = dt_node_alloc(DT_NODE_TYPE); + + dnp->dn_op = DT_TOK_IDENT; + dnp->dn_ctfp = yypcb->pcb_hdl->dt_cdefs->dm_ctfp; + dnp->dn_type = CTF_ERR; + dnp->dn_attr = _dtrace_defattr; + + return (dnp); +} + +/* + * Instantiate a decl using the contents of the current declaration stack. As + * we do not currently permit decls to be initialized, this function currently + * returns NULL and no parse node is created. When this function is called, + * the topmost scope's ds_ident pointer will be set to NULL (indicating no + * init_declarator rule was matched) or will point to the identifier to use. + */ +dt_node_t * +dt_node_decl(void) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_scope_t *dsp = &yypcb->pcb_dstack; + dt_dclass_t class = dsp->ds_class; + dt_decl_t *ddp = dt_decl_top(); + + dt_module_t *dmp; + dtrace_typeinfo_t dtt; + ctf_id_t type; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + if (dt_decl_type(ddp, &dtt) != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + + /* + * If we have no declaration identifier, then this is either a spurious + * declaration of an intrinsic type (e.g. "extern int;") or declaration + * or redeclaration of a struct, union, or enum type or tag. + */ + if (dsp->ds_ident == NULL) { + if (ddp->dd_kind != CTF_K_STRUCT && + ddp->dd_kind != CTF_K_UNION && ddp->dd_kind != CTF_K_ENUM) + xyerror(D_DECL_USELESS, "useless declaration\n"); + + dt_dprintf("type %s added as id %ld\n", dt_type_name( + ddp->dd_ctfp, ddp->dd_type, n1, sizeof (n1)), ddp->dd_type); + + return (NULL); + } + + if (strchr(dsp->ds_ident, '`') != NULL) { + xyerror(D_DECL_SCOPE, "D scoping operator may not be used in " + "a declaration name (%s)\n", dsp->ds_ident); + } + + /* + * If we are nested inside of a C include file, add the declaration to + * the C definition module; otherwise use the D definition module. + */ + if (yypcb->pcb_idepth != 0) + dmp = dtp->dt_cdefs; + else + dmp = dtp->dt_ddefs; + + /* + * If we see a global or static declaration of a function prototype, + * treat this as equivalent to a D extern declaration. + */ + if (ctf_type_kind(dtt.dtt_ctfp, dtt.dtt_type) == CTF_K_FUNCTION && + (class == DT_DC_DEFAULT || class == DT_DC_STATIC)) + class = DT_DC_EXTERN; + + switch (class) { + case DT_DC_AUTO: + case DT_DC_REGISTER: + case DT_DC_STATIC: + xyerror(D_DECL_BADCLASS, "specified storage class not " + "appropriate in D\n"); + /*NOTREACHED*/ + + case DT_DC_EXTERN: { + dtrace_typeinfo_t ott; + dtrace_syminfo_t dts; + GElf_Sym sym; + + int exists = dtrace_lookup_by_name(dtp, + dmp->dm_name, dsp->ds_ident, &sym, &dts) == 0; + + if (exists && (dtrace_symbol_type(dtp, &sym, &dts, &ott) != 0 || + ctf_type_cmp(dtt.dtt_ctfp, dtt.dtt_type, + ott.dtt_ctfp, ott.dtt_type) != 0)) { + xyerror(D_DECL_IDRED, "identifier redeclared: %s`%s\n" + "\t current: %s\n\tprevious: %s\n", + dmp->dm_name, dsp->ds_ident, + dt_type_name(dtt.dtt_ctfp, dtt.dtt_type, + n1, sizeof (n1)), + dt_type_name(ott.dtt_ctfp, ott.dtt_type, + n2, sizeof (n2))); + } else if (!exists && dt_module_extern(dtp, dmp, + dsp->ds_ident, &dtt) == NULL) { + xyerror(D_UNKNOWN, + "failed to extern %s: %s\n", dsp->ds_ident, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } else { + dt_dprintf("extern %s`%s type=<%s>\n", + dmp->dm_name, dsp->ds_ident, + dt_type_name(dtt.dtt_ctfp, dtt.dtt_type, + n1, sizeof (n1))); + } + break; + } + + case DT_DC_TYPEDEF: + if (dt_idstack_lookup(&yypcb->pcb_globals, dsp->ds_ident)) { + xyerror(D_DECL_IDRED, "global variable identifier " + "redeclared: %s\n", dsp->ds_ident); + } + + if (ctf_lookup_by_name(dmp->dm_ctfp, + dsp->ds_ident) != CTF_ERR) { + xyerror(D_DECL_IDRED, + "typedef redeclared: %s\n", dsp->ds_ident); + } + + /* + * If the source type for the typedef is not defined in the + * target container or its parent, copy the type to the target + * container and reset dtt_ctfp and dtt_type to the copy. + */ + if (dtt.dtt_ctfp != dmp->dm_ctfp && + dtt.dtt_ctfp != ctf_parent_file(dmp->dm_ctfp)) { + + dtt.dtt_type = ctf_add_type(dmp->dm_ctfp, + dtt.dtt_ctfp, dtt.dtt_type); + dtt.dtt_ctfp = dmp->dm_ctfp; + + if (dtt.dtt_type == CTF_ERR || + ctf_update(dtt.dtt_ctfp) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to copy typedef %s " + "source type: %s\n", dsp->ds_ident, + ctf_errmsg(ctf_errno(dtt.dtt_ctfp))); + } + } + + type = ctf_add_typedef(dmp->dm_ctfp, + CTF_ADD_ROOT, dsp->ds_ident, dtt.dtt_type); + + if (type == CTF_ERR || ctf_update(dmp->dm_ctfp) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to typedef %s: %s\n", + dsp->ds_ident, ctf_errmsg(ctf_errno(dmp->dm_ctfp))); + } + + dt_dprintf("typedef %s added as id %ld\n", dsp->ds_ident, type); + break; + + default: { + ctf_encoding_t cte; + dt_idhash_t *dhp; + dt_ident_t *idp; + dt_node_t idn; + int assc, idkind; + uint_t id, kind; + ushort_t idflags; + + switch (class) { + case DT_DC_THIS: + dhp = yypcb->pcb_locals; + idflags = DT_IDFLG_LOCAL; + idp = dt_idhash_lookup(dhp, dsp->ds_ident); + break; + case DT_DC_SELF: + dhp = dtp->dt_tls; + idflags = DT_IDFLG_TLS; + idp = dt_idhash_lookup(dhp, dsp->ds_ident); + break; + default: + dhp = dtp->dt_globals; + idflags = 0; + idp = dt_idstack_lookup( + &yypcb->pcb_globals, dsp->ds_ident); + break; + } + + if (ddp->dd_kind == CTF_K_ARRAY && ddp->dd_node == NULL) { + xyerror(D_DECL_ARRNULL, + "array declaration requires array dimension or " + "tuple signature: %s\n", dsp->ds_ident); + } + + if (idp != NULL && idp->di_gen == 0) { + xyerror(D_DECL_IDRED, "built-in identifier " + "redeclared: %s\n", idp->di_name); + } + + if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_CDEFS, + dsp->ds_ident, NULL) == 0 || + dtrace_lookup_by_type(dtp, DTRACE_OBJ_DDEFS, + dsp->ds_ident, NULL) == 0) { + xyerror(D_DECL_IDRED, "typedef identifier " + "redeclared: %s\n", dsp->ds_ident); + } + + /* + * Cache some attributes of the decl to make the rest of this + * code simpler: if the decl is an array which is subscripted + * by a type rather than an integer, then it's an associative + * array (assc). We then expect to match either DT_IDENT_ARRAY + * for associative arrays or DT_IDENT_SCALAR for anything else. + */ + assc = ddp->dd_kind == CTF_K_ARRAY && + ddp->dd_node->dn_kind == DT_NODE_TYPE; + + idkind = assc ? DT_IDENT_ARRAY : DT_IDENT_SCALAR; + + /* + * Create a fake dt_node_t on the stack so we can determine the + * type of any matching identifier by assigning to this node. + * If the pre-existing ident has its di_type set, propagate + * the type by hand so as not to trigger a prototype check for + * arrays (yet); otherwise we use dt_ident_cook() on the ident + * to ensure it is fully initialized before looking at it. + */ + bzero(&idn, sizeof (dt_node_t)); + + if (idp != NULL && idp->di_type != CTF_ERR) + dt_node_type_assign(&idn, idp->di_ctfp, idp->di_type, + B_FALSE); + else if (idp != NULL) + (void) dt_ident_cook(&idn, idp, NULL); + + if (assc) { + if (class == DT_DC_THIS) { + xyerror(D_DECL_LOCASSC, "associative arrays " + "may not be declared as local variables:" + " %s\n", dsp->ds_ident); + } + + if (dt_decl_type(ddp->dd_next, &dtt) != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + } + + if (idp != NULL && (idp->di_kind != idkind || + ctf_type_cmp(dtt.dtt_ctfp, dtt.dtt_type, + idn.dn_ctfp, idn.dn_type) != 0)) { + xyerror(D_DECL_IDRED, "identifier redeclared: %s\n" + "\t current: %s %s\n\tprevious: %s %s\n", + dsp->ds_ident, dt_idkind_name(idkind), + dt_type_name(dtt.dtt_ctfp, + dtt.dtt_type, n1, sizeof (n1)), + dt_idkind_name(idp->di_kind), + dt_node_type_name(&idn, n2, sizeof (n2))); + + } else if (idp != NULL && assc) { + const dt_idsig_t *isp = idp->di_data; + dt_node_t *dnp = ddp->dd_node; + int argc = 0; + + for (; dnp != NULL; dnp = dnp->dn_list, argc++) { + const dt_node_t *pnp = &isp->dis_args[argc]; + + if (argc >= isp->dis_argc) + continue; /* tuple length mismatch */ + + if (ctf_type_cmp(dnp->dn_ctfp, dnp->dn_type, + pnp->dn_ctfp, pnp->dn_type) == 0) + continue; + + xyerror(D_DECL_IDRED, + "identifier redeclared: %s\n" + "\t current: %s, key #%d of type %s\n" + "\tprevious: %s, key #%d of type %s\n", + dsp->ds_ident, + dt_idkind_name(idkind), argc + 1, + dt_node_type_name(dnp, n1, sizeof (n1)), + dt_idkind_name(idp->di_kind), argc + 1, + dt_node_type_name(pnp, n2, sizeof (n2))); + } + + if (isp->dis_argc != argc) { + xyerror(D_DECL_IDRED, + "identifier redeclared: %s\n" + "\t current: %s of %s, tuple length %d\n" + "\tprevious: %s of %s, tuple length %d\n", + dsp->ds_ident, dt_idkind_name(idkind), + dt_type_name(dtt.dtt_ctfp, dtt.dtt_type, + n1, sizeof (n1)), argc, + dt_idkind_name(idp->di_kind), + dt_node_type_name(&idn, n2, sizeof (n2)), + isp->dis_argc); + } + + } else if (idp == NULL) { + type = ctf_type_resolve(dtt.dtt_ctfp, dtt.dtt_type); + kind = ctf_type_kind(dtt.dtt_ctfp, type); + + switch (kind) { + case CTF_K_INTEGER: + if (ctf_type_encoding(dtt.dtt_ctfp, type, + &cte) == 0 && IS_VOID(cte)) { + xyerror(D_DECL_VOIDOBJ, "cannot have " + "void object: %s\n", dsp->ds_ident); + } + break; + case CTF_K_STRUCT: + case CTF_K_UNION: + if (ctf_type_size(dtt.dtt_ctfp, type) != 0) + break; /* proceed to declaring */ + /*FALLTHRU*/ + case CTF_K_FORWARD: + xyerror(D_DECL_INCOMPLETE, + "incomplete struct/union/enum %s: %s\n", + dt_type_name(dtt.dtt_ctfp, dtt.dtt_type, + n1, sizeof (n1)), dsp->ds_ident); + /*NOTREACHED*/ + } + + if (dt_idhash_nextid(dhp, &id) == -1) { + xyerror(D_ID_OFLOW, "cannot create %s: limit " + "on number of %s variables exceeded\n", + dsp->ds_ident, dt_idhash_name(dhp)); + } + + dt_dprintf("declare %s %s variable %s, id=%u\n", + dt_idhash_name(dhp), dt_idkind_name(idkind), + dsp->ds_ident, id); + + idp = dt_idhash_insert(dhp, dsp->ds_ident, idkind, + idflags | DT_IDFLG_WRITE | DT_IDFLG_DECL, id, + _dtrace_defattr, 0, assc ? &dt_idops_assc : + &dt_idops_thaw, NULL, dtp->dt_gen); + + if (idp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dt_ident_type_assign(idp, dtt.dtt_ctfp, dtt.dtt_type); + + /* + * If we are declaring an associative array, use our + * fake parse node to cook the new assoc identifier. + * This will force the ident code to instantiate the + * array type signature corresponding to the list of + * types pointed to by ddp->dd_node. We also reset + * the identifier's attributes based upon the result. + */ + if (assc) { + idp->di_attr = + dt_ident_cook(&idn, idp, &ddp->dd_node); + } + } + } + + } /* end of switch */ + + free(dsp->ds_ident); + dsp->ds_ident = NULL; + + return (NULL); +} + +dt_node_t * +dt_node_func(dt_node_t *dnp, dt_node_t *args) +{ + dt_ident_t *idp; + + if (dnp->dn_kind != DT_NODE_IDENT) { + xyerror(D_FUNC_IDENT, + "function designator is not of function type\n"); + } + + idp = dt_idstack_lookup(&yypcb->pcb_globals, dnp->dn_string); + + if (idp == NULL) { + xyerror(D_FUNC_UNDEF, + "undefined function name: %s\n", dnp->dn_string); + } + + if (idp->di_kind != DT_IDENT_FUNC && + idp->di_kind != DT_IDENT_AGGFUNC && + idp->di_kind != DT_IDENT_ACTFUNC) { + xyerror(D_FUNC_IDKIND, "%s '%s' may not be referenced as a " + "function\n", dt_idkind_name(idp->di_kind), idp->di_name); + } + + free(dnp->dn_string); + dnp->dn_string = NULL; + + dnp->dn_kind = DT_NODE_FUNC; + dnp->dn_flags &= ~DT_NF_COOKED; + dnp->dn_ident = idp; + dnp->dn_args = args; + dnp->dn_list = NULL; + + return (dnp); +} + +/* + * The offsetof() function is special because it takes a type name as an + * argument. It does not actually construct its own node; after looking up the + * structure or union offset, we just return an integer node with the offset. + */ +dt_node_t * +dt_node_offsetof(dt_decl_t *ddp, char *s) +{ + dtrace_typeinfo_t dtt; + dt_node_t dn; + char *name; + int err; + + ctf_membinfo_t ctm; + ctf_id_t type; + uint_t kind; + + name = alloca(strlen(s) + 1); + (void) strcpy(name, s); + free(s); + + err = dt_decl_type(ddp, &dtt); + dt_decl_free(ddp); + + if (err != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + + type = ctf_type_resolve(dtt.dtt_ctfp, dtt.dtt_type); + kind = ctf_type_kind(dtt.dtt_ctfp, type); + + if (kind != CTF_K_STRUCT && kind != CTF_K_UNION) { + xyerror(D_OFFSETOF_TYPE, + "offsetof operand must be a struct or union type\n"); + } + + if (ctf_member_info(dtt.dtt_ctfp, type, name, &ctm) == CTF_ERR) { + xyerror(D_UNKNOWN, "failed to determine offset of %s: %s\n", + name, ctf_errmsg(ctf_errno(dtt.dtt_ctfp))); + } + + bzero(&dn, sizeof (dn)); + dt_node_type_assign(&dn, dtt.dtt_ctfp, ctm.ctm_type, B_FALSE); + + if (dn.dn_flags & DT_NF_BITFIELD) { + xyerror(D_OFFSETOF_BITFIELD, + "cannot take offset of a bit-field: %s\n", name); + } + + return (dt_node_int(ctm.ctm_offset / NBBY)); +} + +dt_node_t * +dt_node_op1(int op, dt_node_t *cp) +{ + dt_node_t *dnp; + + if (cp->dn_kind == DT_NODE_INT) { + switch (op) { + case DT_TOK_INEG: + /* + * If we're negating an unsigned integer, zero out any + * extra top bits to truncate the value to the size of + * the effective type determined by dt_node_int(). + */ + cp->dn_value = -cp->dn_value; + if (!(cp->dn_flags & DT_NF_SIGNED)) { + cp->dn_value &= ~0ULL >> + (64 - dt_node_type_size(cp) * NBBY); + } + /*FALLTHRU*/ + case DT_TOK_IPOS: + return (cp); + case DT_TOK_BNEG: + cp->dn_value = ~cp->dn_value; + return (cp); + case DT_TOK_LNEG: + cp->dn_value = !cp->dn_value; + return (cp); + } + } + + /* + * If sizeof is applied to a type_name or string constant, we can + * transform 'cp' into an integer constant in the node construction + * pass so that it can then be used for arithmetic in this pass. + */ + if (op == DT_TOK_SIZEOF && + (cp->dn_kind == DT_NODE_STRING || cp->dn_kind == DT_NODE_TYPE)) { + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + size_t size = dt_node_type_size(cp); + + if (size == 0) { + xyerror(D_SIZEOF_TYPE, "cannot apply sizeof to an " + "operand of unknown size\n"); + } + + dt_node_type_assign(cp, dtp->dt_ddefs->dm_ctfp, + ctf_lookup_by_name(dtp->dt_ddefs->dm_ctfp, "size_t"), + B_FALSE); + + cp->dn_kind = DT_NODE_INT; + cp->dn_op = DT_TOK_INT; + cp->dn_value = size; + + return (cp); + } + + dnp = dt_node_alloc(DT_NODE_OP1); + assert(op <= USHRT_MAX); + dnp->dn_op = (ushort_t)op; + dnp->dn_child = cp; + + return (dnp); +} + +/* + * If an integer constant is being cast to another integer type, we can + * perform the cast as part of integer constant folding in this pass. We must + * take action when the integer is being cast to a smaller type or if it is + * changing signed-ness. If so, we first shift rp's bits bits high (losing + * excess bits if narrowing) and then shift them down with either a logical + * shift (unsigned) or arithmetic shift (signed). + */ +static void +dt_cast(dt_node_t *lp, dt_node_t *rp) +{ + size_t srcsize = dt_node_type_size(rp); + size_t dstsize = dt_node_type_size(lp); + + if (dstsize < srcsize) { + int n = (sizeof (uint64_t) - dstsize) * NBBY; + rp->dn_value <<= n; + rp->dn_value >>= n; + } else if (dstsize > srcsize) { + int n = (sizeof (uint64_t) - srcsize) * NBBY; + int s = (dstsize - srcsize) * NBBY; + + rp->dn_value <<= n; + if (rp->dn_flags & DT_NF_SIGNED) { + rp->dn_value = (intmax_t)rp->dn_value >> s; + rp->dn_value >>= n - s; + } else { + rp->dn_value >>= n; + } + } +} + +dt_node_t * +dt_node_op2(int op, dt_node_t *lp, dt_node_t *rp) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_node_t *dnp; + + /* + * First we check for operations that are illegal -- namely those that + * might result in integer division by zero, and abort if one is found. + */ + if (rp->dn_kind == DT_NODE_INT && rp->dn_value == 0 && + (op == DT_TOK_MOD || op == DT_TOK_DIV || + op == DT_TOK_MOD_EQ || op == DT_TOK_DIV_EQ)) + xyerror(D_DIV_ZERO, "expression contains division by zero\n"); + + /* + * If both children are immediate values, we can just perform inline + * calculation and return a new immediate node with the result. + */ + if (lp->dn_kind == DT_NODE_INT && rp->dn_kind == DT_NODE_INT) { + uintmax_t l = lp->dn_value; + uintmax_t r = rp->dn_value; + + dnp = dt_node_int(0); /* allocate new integer node for result */ + + switch (op) { + case DT_TOK_LOR: + dnp->dn_value = l || r; + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_LXOR: + dnp->dn_value = (l != 0) ^ (r != 0); + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_LAND: + dnp->dn_value = l && r; + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_BOR: + dnp->dn_value = l | r; + dt_node_promote(lp, rp, dnp); + break; + case DT_TOK_XOR: + dnp->dn_value = l ^ r; + dt_node_promote(lp, rp, dnp); + break; + case DT_TOK_BAND: + dnp->dn_value = l & r; + dt_node_promote(lp, rp, dnp); + break; + case DT_TOK_EQU: + dnp->dn_value = l == r; + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_NEQ: + dnp->dn_value = l != r; + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_LT: + dt_node_promote(lp, rp, dnp); + if (dnp->dn_flags & DT_NF_SIGNED) + dnp->dn_value = (intmax_t)l < (intmax_t)r; + else + dnp->dn_value = l < r; + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_LE: + dt_node_promote(lp, rp, dnp); + if (dnp->dn_flags & DT_NF_SIGNED) + dnp->dn_value = (intmax_t)l <= (intmax_t)r; + else + dnp->dn_value = l <= r; + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_GT: + dt_node_promote(lp, rp, dnp); + if (dnp->dn_flags & DT_NF_SIGNED) + dnp->dn_value = (intmax_t)l > (intmax_t)r; + else + dnp->dn_value = l > r; + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_GE: + dt_node_promote(lp, rp, dnp); + if (dnp->dn_flags & DT_NF_SIGNED) + dnp->dn_value = (intmax_t)l >= (intmax_t)r; + else + dnp->dn_value = l >= r; + dt_node_type_assign(dnp, + DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), B_FALSE); + break; + case DT_TOK_LSH: + dnp->dn_value = l << r; + dt_node_type_propagate(lp, dnp); + dt_node_attr_assign(rp, + dt_attr_min(lp->dn_attr, rp->dn_attr)); + break; + case DT_TOK_RSH: + dnp->dn_value = l >> r; + dt_node_type_propagate(lp, dnp); + dt_node_attr_assign(rp, + dt_attr_min(lp->dn_attr, rp->dn_attr)); + break; + case DT_TOK_ADD: + dnp->dn_value = l + r; + dt_node_promote(lp, rp, dnp); + break; + case DT_TOK_SUB: + dnp->dn_value = l - r; + dt_node_promote(lp, rp, dnp); + break; + case DT_TOK_MUL: + dnp->dn_value = l * r; + dt_node_promote(lp, rp, dnp); + break; + case DT_TOK_DIV: + dt_node_promote(lp, rp, dnp); + if (dnp->dn_flags & DT_NF_SIGNED) + dnp->dn_value = (intmax_t)l / (intmax_t)r; + else + dnp->dn_value = l / r; + break; + case DT_TOK_MOD: + dt_node_promote(lp, rp, dnp); + if (dnp->dn_flags & DT_NF_SIGNED) + dnp->dn_value = (intmax_t)l % (intmax_t)r; + else + dnp->dn_value = l % r; + break; + default: + dt_node_free(dnp); + dnp = NULL; + } + + if (dnp != NULL) { + dt_node_free(lp); + dt_node_free(rp); + return (dnp); + } + } + + if (op == DT_TOK_LPAR && rp->dn_kind == DT_NODE_INT && + dt_node_is_integer(lp)) { + dt_cast(lp, rp); + dt_node_type_propagate(lp, rp); + dt_node_attr_assign(rp, dt_attr_min(lp->dn_attr, rp->dn_attr)); + dt_node_free(lp); + + return (rp); + } + + /* + * If no immediate optimizations are available, create an new OP2 node + * and glue the left and right children into place and return. + */ + dnp = dt_node_alloc(DT_NODE_OP2); + assert(op <= USHRT_MAX); + dnp->dn_op = (ushort_t)op; + dnp->dn_left = lp; + dnp->dn_right = rp; + + return (dnp); +} + +dt_node_t * +dt_node_op3(dt_node_t *expr, dt_node_t *lp, dt_node_t *rp) +{ + dt_node_t *dnp; + + if (expr->dn_kind == DT_NODE_INT) + return (expr->dn_value != 0 ? lp : rp); + + dnp = dt_node_alloc(DT_NODE_OP3); + dnp->dn_op = DT_TOK_QUESTION; + dnp->dn_expr = expr; + dnp->dn_left = lp; + dnp->dn_right = rp; + + return (dnp); +} + +dt_node_t * +dt_node_statement(dt_node_t *expr) +{ + dt_node_t *dnp; + + if (expr->dn_kind == DT_NODE_AGG) + return (expr); + + if (expr->dn_kind == DT_NODE_FUNC && + expr->dn_ident->di_kind == DT_IDENT_ACTFUNC) + dnp = dt_node_alloc(DT_NODE_DFUNC); + else + dnp = dt_node_alloc(DT_NODE_DEXPR); + + dnp->dn_expr = expr; + return (dnp); +} + +dt_node_t * +dt_node_pdesc_by_name(char *spec) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_node_t *dnp; + + if (spec == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dnp = dt_node_alloc(DT_NODE_PDESC); + dnp->dn_spec = spec; + dnp->dn_desc = malloc(sizeof (dtrace_probedesc_t)); + + if (dnp->dn_desc == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + if (dtrace_xstr2desc(dtp, yypcb->pcb_pspec, dnp->dn_spec, + yypcb->pcb_sargc, yypcb->pcb_sargv, dnp->dn_desc) != 0) { + xyerror(D_PDESC_INVAL, "invalid probe description \"%s\": %s\n", + dnp->dn_spec, dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + + free(dnp->dn_spec); + dnp->dn_spec = NULL; + + return (dnp); +} + +dt_node_t * +dt_node_pdesc_by_id(uintmax_t id) +{ + static const char *const names[] = { + "providers", "modules", "functions" + }; + + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_node_t *dnp = dt_node_alloc(DT_NODE_PDESC); + + if ((dnp->dn_desc = malloc(sizeof (dtrace_probedesc_t))) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + if (id > UINT_MAX) { + xyerror(D_PDESC_INVAL, "identifier %llu exceeds maximum " + "probe id\n", (u_longlong_t)id); + } + + if (yypcb->pcb_pspec != DTRACE_PROBESPEC_NAME) { + xyerror(D_PDESC_INVAL, "probe identifier %llu not permitted " + "when specifying %s\n", (u_longlong_t)id, + names[yypcb->pcb_pspec]); + } + + if (dtrace_id2desc(dtp, (dtrace_id_t)id, dnp->dn_desc) != 0) { + xyerror(D_PDESC_INVAL, "invalid probe identifier %llu: %s\n", + (u_longlong_t)id, dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + + return (dnp); +} + +dt_node_t * +dt_node_clause(dt_node_t *pdescs, dt_node_t *pred, dt_node_t *acts) +{ + dt_node_t *dnp = dt_node_alloc(DT_NODE_CLAUSE); + + dnp->dn_pdescs = pdescs; + dnp->dn_pred = pred; + dnp->dn_acts = acts; + + yybegin(YYS_CLAUSE); + return (dnp); +} + +dt_node_t * +dt_node_inline(dt_node_t *expr) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_scope_t *dsp = &yypcb->pcb_dstack; + dt_decl_t *ddp = dt_decl_top(); + + char n[DT_TYPE_NAMELEN]; + dtrace_typeinfo_t dtt; + + dt_ident_t *idp, *rdp; + dt_idnode_t *inp; + dt_node_t *dnp; + + if (dt_decl_type(ddp, &dtt) != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + + if (dsp->ds_class != DT_DC_DEFAULT) { + xyerror(D_DECL_BADCLASS, "specified storage class not " + "appropriate for inline declaration\n"); + } + + if (dsp->ds_ident == NULL) + xyerror(D_DECL_USELESS, "inline declaration requires a name\n"); + + if ((idp = dt_idstack_lookup( + &yypcb->pcb_globals, dsp->ds_ident)) != NULL) { + xyerror(D_DECL_IDRED, "identifier redefined: %s\n\t current: " + "inline definition\n\tprevious: %s %s\n", + idp->di_name, dt_idkind_name(idp->di_kind), + (idp->di_flags & DT_IDFLG_INLINE) ? "inline" : ""); + } + + /* + * If we are declaring an inlined array, verify that we have a tuple + * signature, and then recompute 'dtt' as the array's value type. + */ + if (ddp->dd_kind == CTF_K_ARRAY) { + if (ddp->dd_node == NULL) { + xyerror(D_DECL_ARRNULL, "inline declaration requires " + "array tuple signature: %s\n", dsp->ds_ident); + } + + if (ddp->dd_node->dn_kind != DT_NODE_TYPE) { + xyerror(D_DECL_ARRNULL, "inline declaration cannot be " + "of scalar array type: %s\n", dsp->ds_ident); + } + + if (dt_decl_type(ddp->dd_next, &dtt) != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + } + + /* + * If the inline identifier is not defined, then create it with the + * orphan flag set. We do not insert the identifier into dt_globals + * until we have successfully cooked the right-hand expression, below. + */ + dnp = dt_node_alloc(DT_NODE_INLINE); + dt_node_type_assign(dnp, dtt.dtt_ctfp, dtt.dtt_type, B_FALSE); + dt_node_attr_assign(dnp, _dtrace_defattr); + + if (dt_node_is_void(dnp)) { + xyerror(D_DECL_VOIDOBJ, + "cannot declare void inline: %s\n", dsp->ds_ident); + } + + if (ctf_type_kind(dnp->dn_ctfp, ctf_type_resolve( + dnp->dn_ctfp, dnp->dn_type)) == CTF_K_FORWARD) { + xyerror(D_DECL_INCOMPLETE, + "incomplete struct/union/enum %s: %s\n", + dt_node_type_name(dnp, n, sizeof (n)), dsp->ds_ident); + } + + if ((inp = malloc(sizeof (dt_idnode_t))) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + bzero(inp, sizeof (dt_idnode_t)); + + idp = dnp->dn_ident = dt_ident_create(dsp->ds_ident, + ddp->dd_kind == CTF_K_ARRAY ? DT_IDENT_ARRAY : DT_IDENT_SCALAR, + DT_IDFLG_INLINE | DT_IDFLG_REF | DT_IDFLG_DECL | DT_IDFLG_ORPHAN, 0, + _dtrace_defattr, 0, &dt_idops_inline, inp, dtp->dt_gen); + + if (idp == NULL) { + free(inp); + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + } + + /* + * If we're inlining an associative array, create a private identifier + * hash containing the named parameters and store it in inp->din_hash. + * We then push this hash on to the top of the pcb_globals stack. + */ + if (ddp->dd_kind == CTF_K_ARRAY) { + dt_idnode_t *pinp; + dt_ident_t *pidp; + dt_node_t *pnp; + uint_t i = 0; + + for (pnp = ddp->dd_node; pnp != NULL; pnp = pnp->dn_list) + i++; /* count up parameters for din_argv[] */ + + inp->din_hash = dt_idhash_create("inline args", NULL, 0, 0); + inp->din_argv = calloc(i, sizeof (dt_ident_t *)); + + if (inp->din_hash == NULL || inp->din_argv == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + /* + * Create an identifier for each parameter as a scalar inline, + * and store it in din_hash and in position in din_argv[]. The + * parameter identifiers also use dt_idops_inline, but we leave + * the dt_idnode_t argument 'pinp' zeroed. This will be filled + * in by the code generation pass with references to the args. + */ + for (i = 0, pnp = ddp->dd_node; + pnp != NULL; pnp = pnp->dn_list, i++) { + + if (pnp->dn_string == NULL) + continue; /* ignore anonymous parameters */ + + if ((pinp = malloc(sizeof (dt_idnode_t))) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + pidp = dt_idhash_insert(inp->din_hash, pnp->dn_string, + DT_IDENT_SCALAR, DT_IDFLG_DECL | DT_IDFLG_INLINE, 0, + _dtrace_defattr, 0, &dt_idops_inline, + pinp, dtp->dt_gen); + + if (pidp == NULL) { + free(pinp); + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + } + + inp->din_argv[i] = pidp; + bzero(pinp, sizeof (dt_idnode_t)); + dt_ident_type_assign(pidp, pnp->dn_ctfp, pnp->dn_type); + } + + dt_idstack_push(&yypcb->pcb_globals, inp->din_hash); + } + + /* + * Unlike most constructors, we need to explicitly cook the right-hand + * side of the inline definition immediately to prevent recursion. If + * the right-hand side uses the inline itself, the cook will fail. + */ + expr = dt_node_cook(expr, DT_IDFLG_REF); + + if (ddp->dd_kind == CTF_K_ARRAY) + dt_idstack_pop(&yypcb->pcb_globals, inp->din_hash); + + /* + * Set the type, attributes, and flags for the inline. If the right- + * hand expression has an identifier, propagate its flags. Then cook + * the identifier to fully initialize it: if we're declaring an inline + * associative array this will construct a type signature from 'ddp'. + */ + if (dt_node_is_dynamic(expr)) + rdp = dt_ident_resolve(expr->dn_ident); + else if (expr->dn_kind == DT_NODE_VAR || expr->dn_kind == DT_NODE_SYM) + rdp = expr->dn_ident; + else + rdp = NULL; + + if (rdp != NULL) { + idp->di_flags |= (rdp->di_flags & + (DT_IDFLG_WRITE | DT_IDFLG_USER | DT_IDFLG_PRIM)); + } + + idp->di_attr = dt_attr_min(_dtrace_defattr, expr->dn_attr); + dt_ident_type_assign(idp, dtt.dtt_ctfp, dtt.dtt_type); + (void) dt_ident_cook(dnp, idp, &ddp->dd_node); + + /* + * Store the parse tree nodes for 'expr' inside of idp->di_data ('inp') + * so that they will be preserved with this identifier. Then pop the + * inline declaration from the declaration stack and restore the lexer. + */ + inp->din_list = yypcb->pcb_list; + inp->din_root = expr; + + dt_decl_free(dt_decl_pop()); + yybegin(YYS_CLAUSE); + + /* + * Finally, insert the inline identifier into dt_globals to make it + * visible, and then cook 'dnp' to check its type against 'expr'. + */ + dt_idhash_xinsert(dtp->dt_globals, idp); + return (dt_node_cook(dnp, DT_IDFLG_REF)); +} + +dt_node_t * +dt_node_member(dt_decl_t *ddp, char *name, dt_node_t *expr) +{ + dtrace_typeinfo_t dtt; + dt_node_t *dnp; + int err; + + if (ddp != NULL) { + err = dt_decl_type(ddp, &dtt); + dt_decl_free(ddp); + + if (err != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + } + + dnp = dt_node_alloc(DT_NODE_MEMBER); + dnp->dn_membname = name; + dnp->dn_membexpr = expr; + + if (ddp != NULL) + dt_node_type_assign(dnp, dtt.dtt_ctfp, dtt.dtt_type, + dtt.dtt_flags); + + return (dnp); +} + +dt_node_t * +dt_node_xlator(dt_decl_t *ddp, dt_decl_t *sdp, char *name, dt_node_t *members) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dtrace_typeinfo_t src, dst; + dt_node_t sn, dn; + dt_xlator_t *dxp; + dt_node_t *dnp; + int edst, esrc; + uint_t kind; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + edst = dt_decl_type(ddp, &dst); + dt_decl_free(ddp); + + esrc = dt_decl_type(sdp, &src); + dt_decl_free(sdp); + + if (edst != 0 || esrc != 0) { + free(name); + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); + } + + bzero(&sn, sizeof (sn)); + dt_node_type_assign(&sn, src.dtt_ctfp, src.dtt_type, B_FALSE); + + bzero(&dn, sizeof (dn)); + dt_node_type_assign(&dn, dst.dtt_ctfp, dst.dtt_type, B_FALSE); + + if (dt_xlator_lookup(dtp, &sn, &dn, DT_XLATE_EXACT) != NULL) { + xyerror(D_XLATE_REDECL, + "translator from %s to %s has already been declared\n", + dt_node_type_name(&sn, n1, sizeof (n1)), + dt_node_type_name(&dn, n2, sizeof (n2))); + } + + kind = ctf_type_kind(dst.dtt_ctfp, + ctf_type_resolve(dst.dtt_ctfp, dst.dtt_type)); + + if (kind == CTF_K_FORWARD) { + xyerror(D_XLATE_SOU, "incomplete struct/union/enum %s\n", + dt_type_name(dst.dtt_ctfp, dst.dtt_type, n1, sizeof (n1))); + } + + if (kind != CTF_K_STRUCT && kind != CTF_K_UNION) { + xyerror(D_XLATE_SOU, + "translator output type must be a struct or union\n"); + } + + dxp = dt_xlator_create(dtp, &src, &dst, name, members, yypcb->pcb_list); + yybegin(YYS_CLAUSE); + free(name); + + if (dxp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + dnp = dt_node_alloc(DT_NODE_XLATOR); + dnp->dn_xlator = dxp; + dnp->dn_members = members; + + return (dt_node_cook(dnp, DT_IDFLG_REF)); +} + +dt_node_t * +dt_node_probe(char *s, int protoc, dt_node_t *nargs, dt_node_t *xargs) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + int nargc, xargc; + dt_node_t *dnp; + + size_t len = strlen(s) + 3; /* +3 for :: and \0 */ + char *name = alloca(len); + + (void) snprintf(name, len, "::%s", s); + (void) strhyphenate(name); + free(s); + + if (strchr(name, '`') != NULL) { + xyerror(D_PROV_BADNAME, "probe name may not " + "contain scoping operator: %s\n", name); + } + + if (strlen(name) - 2 >= DTRACE_NAMELEN) { + xyerror(D_PROV_BADNAME, "probe name may not exceed %d " + "characters: %s\n", DTRACE_NAMELEN - 1, name); + } + + dnp = dt_node_alloc(DT_NODE_PROBE); + + dnp->dn_ident = dt_ident_create(name, DT_IDENT_PROBE, + DT_IDFLG_ORPHAN, DTRACE_IDNONE, _dtrace_defattr, 0, + &dt_idops_probe, NULL, dtp->dt_gen); + + nargc = dt_decl_prototype(nargs, nargs, + "probe input", DT_DP_VOID | DT_DP_ANON); + + xargc = dt_decl_prototype(xargs, nargs, + "probe output", DT_DP_VOID); + + if (nargc > UINT8_MAX) { + xyerror(D_PROV_PRARGLEN, "probe %s input prototype exceeds %u " + "parameters: %d params used\n", name, UINT8_MAX, nargc); + } + + if (xargc > UINT8_MAX) { + xyerror(D_PROV_PRARGLEN, "probe %s output prototype exceeds %u " + "parameters: %d params used\n", name, UINT8_MAX, xargc); + } + + if (dnp->dn_ident == NULL || dt_probe_create(dtp, + dnp->dn_ident, protoc, nargs, nargc, xargs, xargc) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + return (dnp); +} + +dt_node_t * +dt_node_provider(char *name, dt_node_t *probes) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_node_t *dnp = dt_node_alloc(DT_NODE_PROVIDER); + dt_node_t *lnp; + size_t len; + + dnp->dn_provname = name; + dnp->dn_probes = probes; + + if (strchr(name, '`') != NULL) { + dnerror(dnp, D_PROV_BADNAME, "provider name may not " + "contain scoping operator: %s\n", name); + } + + if ((len = strlen(name)) >= DTRACE_PROVNAMELEN) { + dnerror(dnp, D_PROV_BADNAME, "provider name may not exceed %d " + "characters: %s\n", DTRACE_PROVNAMELEN - 1, name); + } + + if (isdigit(name[len - 1])) { + dnerror(dnp, D_PROV_BADNAME, "provider name may not " + "end with a digit: %s\n", name); + } + + /* + * Check to see if the provider is already defined or visible through + * dtrace(7D). If so, set dn_provred to treat it as a re-declaration. + * If not, create a new provider and set its interface-only flag. This + * flag may be cleared later by calls made to dt_probe_declare(). + */ + if ((dnp->dn_provider = dt_provider_lookup(dtp, name)) != NULL) + dnp->dn_provred = B_TRUE; + else if ((dnp->dn_provider = dt_provider_create(dtp, name)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + else + dnp->dn_provider->pv_flags |= DT_PROVIDER_INTF; + + /* + * Store all parse nodes created since we consumed the DT_KEY_PROVIDER + * token with the provider and then restore our lexing state to CLAUSE. + * Note that if dnp->dn_provred is true, we may end up storing dups of + * a provider's interface and implementation: we eat this space because + * the implementation will likely need to redeclare probe members, and + * therefore may result in those member nodes becoming persistent. + */ + for (lnp = yypcb->pcb_list; lnp->dn_link != NULL; lnp = lnp->dn_link) + continue; /* skip to end of allocation list */ + + lnp->dn_link = dnp->dn_provider->pv_nodes; + dnp->dn_provider->pv_nodes = yypcb->pcb_list; + + yybegin(YYS_CLAUSE); + return (dnp); +} + +dt_node_t * +dt_node_program(dt_node_t *lnp) +{ + dt_node_t *dnp = dt_node_alloc(DT_NODE_PROG); + dnp->dn_list = lnp; + return (dnp); +} + +/* + * This function provides the underlying implementation of cooking an + * identifier given its node, a hash of dynamic identifiers, an identifier + * kind, and a boolean flag indicating whether we are allowed to instantiate + * a new identifier if the string is not found. This function is either + * called from dt_cook_ident(), below, or directly by the various cooking + * routines that are allowed to instantiate identifiers (e.g. op2 TOK_ASGN). + */ +static void +dt_xcook_ident(dt_node_t *dnp, dt_idhash_t *dhp, uint_t idkind, int create) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + const char *sname = dt_idhash_name(dhp); + int uref = 0; + + dtrace_attribute_t attr = _dtrace_defattr; + dt_ident_t *idp; + dtrace_syminfo_t dts; + GElf_Sym sym; + + const char *scope, *mark; + uchar_t dnkind; + char *name; + + /* + * Look for scoping marks in the identifier. If one is found, set our + * scope to either DTRACE_OBJ_KMODS or UMODS or to the first part of + * the string that specifies the scope using an explicit module name. + * If two marks in a row are found, set 'uref' (user symbol reference). + * Otherwise we set scope to DTRACE_OBJ_EXEC, indicating that normal + * scope is desired and we should search the specified idhash. + */ + if ((name = strrchr(dnp->dn_string, '`')) != NULL) { + if (name > dnp->dn_string && name[-1] == '`') { + uref++; + name[-1] = '\0'; + } + + if (name == dnp->dn_string + uref) + scope = uref ? DTRACE_OBJ_UMODS : DTRACE_OBJ_KMODS; + else + scope = dnp->dn_string; + + *name++ = '\0'; /* leave name pointing after scoping mark */ + dnkind = DT_NODE_VAR; + + } else if (idkind == DT_IDENT_AGG) { + scope = DTRACE_OBJ_EXEC; + name = dnp->dn_string + 1; + dnkind = DT_NODE_AGG; + } else { + scope = DTRACE_OBJ_EXEC; + name = dnp->dn_string; + dnkind = DT_NODE_VAR; + } + + /* + * If create is set to false, and we fail our idhash lookup, preset + * the errno code to EDT_NOVAR for our final error message below. + * If we end up calling dtrace_lookup_by_name(), it will reset the + * errno appropriately and that error will be reported instead. + */ + (void) dt_set_errno(dtp, EDT_NOVAR); + mark = uref ? "``" : "`"; + + if (scope == DTRACE_OBJ_EXEC && ( + (dhp != dtp->dt_globals && + (idp = dt_idhash_lookup(dhp, name)) != NULL) || + (dhp == dtp->dt_globals && + (idp = dt_idstack_lookup(&yypcb->pcb_globals, name)) != NULL))) { + /* + * Check that we are referencing the ident in the manner that + * matches its type if this is a global lookup. In the TLS or + * local case, we don't know how the ident will be used until + * the time operator -> is seen; more parsing is needed. + */ + if (idp->di_kind != idkind && dhp == dtp->dt_globals) { + xyerror(D_IDENT_BADREF, "%s '%s' may not be referenced " + "as %s\n", dt_idkind_name(idp->di_kind), + idp->di_name, dt_idkind_name(idkind)); + } + + /* + * Arrays and aggregations are not cooked individually. They + * have dynamic types and must be referenced using operator []. + * This is handled explicitly by the code for DT_TOK_LBRAC. + */ + if (idp->di_kind != DT_IDENT_ARRAY && + idp->di_kind != DT_IDENT_AGG) + attr = dt_ident_cook(dnp, idp, NULL); + else { + dt_node_type_assign(dnp, + DT_DYN_CTFP(dtp), DT_DYN_TYPE(dtp), B_FALSE); + attr = idp->di_attr; + } + + free(dnp->dn_string); + dnp->dn_string = NULL; + dnp->dn_kind = dnkind; + dnp->dn_ident = idp; + dnp->dn_flags |= DT_NF_LVALUE; + + if (idp->di_flags & DT_IDFLG_WRITE) + dnp->dn_flags |= DT_NF_WRITABLE; + + dt_node_attr_assign(dnp, attr); + + } else if (dhp == dtp->dt_globals && scope != DTRACE_OBJ_EXEC && + dtrace_lookup_by_name(dtp, scope, name, &sym, &dts) == 0) { + + dt_module_t *mp = dt_module_lookup_by_name(dtp, dts.dts_object); + int umod = (mp->dm_flags & DT_DM_KERNEL) == 0; + static const char *const kunames[] = { "kernel", "user" }; + + dtrace_typeinfo_t dtt; + dtrace_syminfo_t *sip; + + if (uref ^ umod) { + xyerror(D_SYM_BADREF, "%s module '%s' symbol '%s' may " + "not be referenced as a %s symbol\n", kunames[umod], + dts.dts_object, dts.dts_name, kunames[uref]); + } + + if (dtrace_symbol_type(dtp, &sym, &dts, &dtt) != 0) { + /* + * For now, we special-case EDT_DATAMODEL to clarify + * that mixed data models are not currently supported. + */ + if (dtp->dt_errno == EDT_DATAMODEL) { + xyerror(D_SYM_MODEL, "cannot use %s symbol " + "%s%s%s in a %s D program\n", + dt_module_modelname(mp), + dts.dts_object, mark, dts.dts_name, + dt_module_modelname(dtp->dt_ddefs)); + } + + xyerror(D_SYM_NOTYPES, + "no symbolic type information is available for " + "%s%s%s: %s\n", dts.dts_object, mark, dts.dts_name, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + + idp = dt_ident_create(name, DT_IDENT_SYMBOL, 0, 0, + _dtrace_symattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen); + + if (idp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + if (mp->dm_flags & DT_DM_PRIMARY) + idp->di_flags |= DT_IDFLG_PRIM; + + idp->di_next = dtp->dt_externs; + dtp->dt_externs = idp; + + if ((sip = malloc(sizeof (dtrace_syminfo_t))) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + bcopy(&dts, sip, sizeof (dtrace_syminfo_t)); + idp->di_data = sip; + idp->di_ctfp = dtt.dtt_ctfp; + idp->di_type = dtt.dtt_type; + + free(dnp->dn_string); + dnp->dn_string = NULL; + dnp->dn_kind = DT_NODE_SYM; + dnp->dn_ident = idp; + dnp->dn_flags |= DT_NF_LVALUE; + + dt_node_type_assign(dnp, dtt.dtt_ctfp, dtt.dtt_type, + dtt.dtt_flags); + dt_node_attr_assign(dnp, _dtrace_symattr); + + if (uref) { + idp->di_flags |= DT_IDFLG_USER; + dnp->dn_flags |= DT_NF_USERLAND; + } + + } else if (scope == DTRACE_OBJ_EXEC && create == B_TRUE) { + uint_t flags = DT_IDFLG_WRITE; + uint_t id; + + if (dt_idhash_nextid(dhp, &id) == -1) { + xyerror(D_ID_OFLOW, "cannot create %s: limit on number " + "of %s variables exceeded\n", name, sname); + } + + if (dhp == yypcb->pcb_locals) + flags |= DT_IDFLG_LOCAL; + else if (dhp == dtp->dt_tls) + flags |= DT_IDFLG_TLS; + + dt_dprintf("create %s %s variable %s, id=%u\n", + sname, dt_idkind_name(idkind), name, id); + + if (idkind == DT_IDENT_ARRAY || idkind == DT_IDENT_AGG) { + idp = dt_idhash_insert(dhp, name, + idkind, flags, id, _dtrace_defattr, 0, + &dt_idops_assc, NULL, dtp->dt_gen); + } else { + idp = dt_idhash_insert(dhp, name, + idkind, flags, id, _dtrace_defattr, 0, + &dt_idops_thaw, NULL, dtp->dt_gen); + } + + if (idp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + /* + * Arrays and aggregations are not cooked individually. They + * have dynamic types and must be referenced using operator []. + * This is handled explicitly by the code for DT_TOK_LBRAC. + */ + if (idp->di_kind != DT_IDENT_ARRAY && + idp->di_kind != DT_IDENT_AGG) + attr = dt_ident_cook(dnp, idp, NULL); + else { + dt_node_type_assign(dnp, + DT_DYN_CTFP(dtp), DT_DYN_TYPE(dtp), B_FALSE); + attr = idp->di_attr; + } + + free(dnp->dn_string); + dnp->dn_string = NULL; + dnp->dn_kind = dnkind; + dnp->dn_ident = idp; + dnp->dn_flags |= DT_NF_LVALUE | DT_NF_WRITABLE; + + dt_node_attr_assign(dnp, attr); + + } else if (scope != DTRACE_OBJ_EXEC) { + xyerror(D_IDENT_UNDEF, "failed to resolve %s%s%s: %s\n", + dnp->dn_string, mark, name, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } else { + xyerror(D_IDENT_UNDEF, "failed to resolve %s: %s\n", + dnp->dn_string, dtrace_errmsg(dtp, dtrace_errno(dtp))); + } +} + +static dt_node_t * +dt_cook_ident(dt_node_t *dnp, uint_t idflags) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + if (dnp->dn_op == DT_TOK_AGG) + dt_xcook_ident(dnp, dtp->dt_aggs, DT_IDENT_AGG, B_FALSE); + else + dt_xcook_ident(dnp, dtp->dt_globals, DT_IDENT_SCALAR, B_FALSE); + + return (dt_node_cook(dnp, idflags)); +} + +/* + * Since operators [ and -> can instantiate new variables before we know + * whether the reference is for a read or a write, we need to check read + * references to determine if the identifier is currently dt_ident_unref(). + * If so, we report that this first access was to an undefined variable. + */ +static dt_node_t * +dt_cook_var(dt_node_t *dnp, uint_t idflags) +{ + dt_ident_t *idp = dnp->dn_ident; + + if ((idflags & DT_IDFLG_REF) && dt_ident_unref(idp)) { + dnerror(dnp, D_VAR_UNDEF, + "%s%s has not yet been declared or assigned\n", + (idp->di_flags & DT_IDFLG_LOCAL) ? "this->" : + (idp->di_flags & DT_IDFLG_TLS) ? "self->" : "", + idp->di_name); + } + + dt_node_attr_assign(dnp, dt_ident_cook(dnp, idp, &dnp->dn_args)); + return (dnp); +} + +/*ARGSUSED*/ +static dt_node_t * +dt_cook_func(dt_node_t *dnp, uint_t idflags) +{ + dt_node_attr_assign(dnp, + dt_ident_cook(dnp, dnp->dn_ident, &dnp->dn_args)); + + return (dnp); +} + +static dt_node_t * +dt_cook_op1(dt_node_t *dnp, uint_t idflags) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_node_t *cp = dnp->dn_child; + + char n[DT_TYPE_NAMELEN]; + dtrace_typeinfo_t dtt; + dt_ident_t *idp; + + ctf_encoding_t e; + ctf_arinfo_t r; + ctf_id_t type, base; + uint_t kind; + + if (dnp->dn_op == DT_TOK_PREINC || dnp->dn_op == DT_TOK_POSTINC || + dnp->dn_op == DT_TOK_PREDEC || dnp->dn_op == DT_TOK_POSTDEC) + idflags = DT_IDFLG_REF | DT_IDFLG_MOD; + else + idflags = DT_IDFLG_REF; + + /* + * We allow the unary ++ and -- operators to instantiate new scalar + * variables if applied to an identifier; otherwise just cook as usual. + */ + if (cp->dn_kind == DT_NODE_IDENT && (idflags & DT_IDFLG_MOD)) + dt_xcook_ident(cp, dtp->dt_globals, DT_IDENT_SCALAR, B_TRUE); + + cp = dnp->dn_child = dt_node_cook(cp, 0); /* don't set idflags yet */ + + if (cp->dn_kind == DT_NODE_VAR && dt_ident_unref(cp->dn_ident)) { + if (dt_type_lookup("int64_t", &dtt) != 0) + xyerror(D_TYPE_ERR, "failed to lookup int64_t\n"); + + dt_ident_type_assign(cp->dn_ident, dtt.dtt_ctfp, dtt.dtt_type); + dt_node_type_assign(cp, dtt.dtt_ctfp, dtt.dtt_type, + dtt.dtt_flags); + } + + if (cp->dn_kind == DT_NODE_VAR) + cp->dn_ident->di_flags |= idflags; + + switch (dnp->dn_op) { + case DT_TOK_DEREF: + /* + * If the deref operator is applied to a translated pointer, + * we set our output type to the output of the translation. + */ + if ((idp = dt_node_resolve(cp, DT_IDENT_XLPTR)) != NULL) { + dt_xlator_t *dxp = idp->di_data; + + dnp->dn_ident = &dxp->dx_souid; + dt_node_type_assign(dnp, + dnp->dn_ident->di_ctfp, dnp->dn_ident->di_type, + cp->dn_flags & DT_NF_USERLAND); + break; + } + + type = ctf_type_resolve(cp->dn_ctfp, cp->dn_type); + kind = ctf_type_kind(cp->dn_ctfp, type); + + if (kind == CTF_K_ARRAY) { + if (ctf_array_info(cp->dn_ctfp, type, &r) != 0) { + dtp->dt_ctferr = ctf_errno(cp->dn_ctfp); + longjmp(yypcb->pcb_jmpbuf, EDT_CTF); + } else + type = r.ctr_contents; + } else if (kind == CTF_K_POINTER) { + type = ctf_type_reference(cp->dn_ctfp, type); + } else { + xyerror(D_DEREF_NONPTR, + "cannot dereference non-pointer type\n"); + } + + dt_node_type_assign(dnp, cp->dn_ctfp, type, + cp->dn_flags & DT_NF_USERLAND); + base = ctf_type_resolve(cp->dn_ctfp, type); + kind = ctf_type_kind(cp->dn_ctfp, base); + + if (kind == CTF_K_INTEGER && ctf_type_encoding(cp->dn_ctfp, + base, &e) == 0 && IS_VOID(e)) { + xyerror(D_DEREF_VOID, + "cannot dereference pointer to void\n"); + } + + if (kind == CTF_K_FUNCTION) { + xyerror(D_DEREF_FUNC, + "cannot dereference pointer to function\n"); + } + + if (kind != CTF_K_ARRAY || dt_node_is_string(dnp)) + dnp->dn_flags |= DT_NF_LVALUE; /* see K&R[A7.4.3] */ + + /* + * If we propagated the l-value bit and the child operand was + * a writable D variable or a binary operation of the form + * a + b where a is writable, then propagate the writable bit. + * This is necessary to permit assignments to scalar arrays, + * which are converted to expressions of the form *(a + i). + */ + if ((cp->dn_flags & DT_NF_WRITABLE) || + (cp->dn_kind == DT_NODE_OP2 && cp->dn_op == DT_TOK_ADD && + (cp->dn_left->dn_flags & DT_NF_WRITABLE))) + dnp->dn_flags |= DT_NF_WRITABLE; + + if ((cp->dn_flags & DT_NF_USERLAND) && + (kind == CTF_K_POINTER || (dnp->dn_flags & DT_NF_REF))) + dnp->dn_flags |= DT_NF_USERLAND; + break; + + case DT_TOK_IPOS: + case DT_TOK_INEG: + if (!dt_node_is_arith(cp)) { + xyerror(D_OP_ARITH, "operator %s requires an operand " + "of arithmetic type\n", opstr(dnp->dn_op)); + } + dt_node_type_propagate(cp, dnp); /* see K&R[A7.4.4-6] */ + break; + + case DT_TOK_BNEG: + if (!dt_node_is_integer(cp)) { + xyerror(D_OP_INT, "operator %s requires an operand of " + "integral type\n", opstr(dnp->dn_op)); + } + dt_node_type_propagate(cp, dnp); /* see K&R[A7.4.4-6] */ + break; + + case DT_TOK_LNEG: + if (!dt_node_is_scalar(cp)) { + xyerror(D_OP_SCALAR, "operator %s requires an operand " + "of scalar type\n", opstr(dnp->dn_op)); + } + dt_node_type_assign(dnp, DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), + B_FALSE); + break; + + case DT_TOK_ADDROF: + if (cp->dn_kind == DT_NODE_VAR || cp->dn_kind == DT_NODE_AGG) { + xyerror(D_ADDROF_VAR, + "cannot take address of dynamic variable\n"); + } + + if (dt_node_is_dynamic(cp)) { + xyerror(D_ADDROF_VAR, + "cannot take address of dynamic object\n"); + } + + if (!(cp->dn_flags & DT_NF_LVALUE)) { + xyerror(D_ADDROF_LVAL, /* see K&R[A7.4.2] */ + "unacceptable operand for unary & operator\n"); + } + + if (cp->dn_flags & DT_NF_BITFIELD) { + xyerror(D_ADDROF_BITFIELD, + "cannot take address of bit-field\n"); + } + + dtt.dtt_object = NULL; + dtt.dtt_ctfp = cp->dn_ctfp; + dtt.dtt_type = cp->dn_type; + + if (dt_type_pointer(&dtt) == -1) { + xyerror(D_TYPE_ERR, "cannot find type for \"&\": %s*\n", + dt_node_type_name(cp, n, sizeof (n))); + } + + dt_node_type_assign(dnp, dtt.dtt_ctfp, dtt.dtt_type, + cp->dn_flags & DT_NF_USERLAND); + break; + + case DT_TOK_SIZEOF: + if (cp->dn_flags & DT_NF_BITFIELD) { + xyerror(D_SIZEOF_BITFIELD, + "cannot apply sizeof to a bit-field\n"); + } + + if (dt_node_sizeof(cp) == 0) { + xyerror(D_SIZEOF_TYPE, "cannot apply sizeof to an " + "operand of unknown size\n"); + } + + dt_node_type_assign(dnp, dtp->dt_ddefs->dm_ctfp, + ctf_lookup_by_name(dtp->dt_ddefs->dm_ctfp, "size_t"), + B_FALSE); + break; + + case DT_TOK_STRINGOF: + if (!dt_node_is_scalar(cp) && !dt_node_is_pointer(cp) && + !dt_node_is_strcompat(cp)) { + xyerror(D_STRINGOF_TYPE, + "cannot apply stringof to a value of type %s\n", + dt_node_type_name(cp, n, sizeof (n))); + } + dt_node_type_assign(dnp, DT_STR_CTFP(dtp), DT_STR_TYPE(dtp), + cp->dn_flags & DT_NF_USERLAND); + break; + + case DT_TOK_PREINC: + case DT_TOK_POSTINC: + case DT_TOK_PREDEC: + case DT_TOK_POSTDEC: + if (dt_node_is_scalar(cp) == 0) { + xyerror(D_OP_SCALAR, "operator %s requires operand of " + "scalar type\n", opstr(dnp->dn_op)); + } + + if (dt_node_is_vfptr(cp)) { + xyerror(D_OP_VFPTR, "operator %s requires an operand " + "of known size\n", opstr(dnp->dn_op)); + } + + if (!(cp->dn_flags & DT_NF_LVALUE)) { + xyerror(D_OP_LVAL, "operator %s requires modifiable " + "lvalue as an operand\n", opstr(dnp->dn_op)); + } + + if (!(cp->dn_flags & DT_NF_WRITABLE)) { + xyerror(D_OP_WRITE, "operator %s can only be applied " + "to a writable variable\n", opstr(dnp->dn_op)); + } + + dt_node_type_propagate(cp, dnp); /* see K&R[A7.4.1] */ + break; + + default: + xyerror(D_UNKNOWN, "invalid unary op %s\n", opstr(dnp->dn_op)); + } + + dt_node_attr_assign(dnp, cp->dn_attr); + return (dnp); +} + +static void +dt_assign_common(dt_node_t *dnp) +{ + dt_node_t *lp = dnp->dn_left; + dt_node_t *rp = dnp->dn_right; + int op = dnp->dn_op; + + if (rp->dn_kind == DT_NODE_INT) + dt_cast(lp, rp); + + if (!(lp->dn_flags & DT_NF_LVALUE)) { + xyerror(D_OP_LVAL, "operator %s requires modifiable " + "lvalue as an operand\n", opstr(op)); + /* see K&R[A7.17] */ + } + + if (!(lp->dn_flags & DT_NF_WRITABLE)) { + xyerror(D_OP_WRITE, "operator %s can only be applied " + "to a writable variable\n", opstr(op)); + } + + dt_node_type_propagate(lp, dnp); /* see K&R[A7.17] */ + dt_node_attr_assign(dnp, dt_attr_min(lp->dn_attr, rp->dn_attr)); +} + +static dt_node_t * +dt_cook_op2(dt_node_t *dnp, uint_t idflags) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_node_t *lp = dnp->dn_left; + dt_node_t *rp = dnp->dn_right; + int op = dnp->dn_op; + + ctf_membinfo_t m; + ctf_file_t *ctfp; + ctf_id_t type; + int kind, val, uref; + dt_ident_t *idp; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + /* + * The expression E1[E2] is identical by definition to *((E1)+(E2)) so + * we convert "[" to "+" and glue on "*" at the end (see K&R[A7.3.1]) + * unless the left-hand side is an untyped D scalar, associative array, + * or aggregation. In these cases, we proceed to case DT_TOK_LBRAC and + * handle associative array and aggregation references there. + */ + if (op == DT_TOK_LBRAC) { + if (lp->dn_kind == DT_NODE_IDENT) { + dt_idhash_t *dhp; + uint_t idkind; + + if (lp->dn_op == DT_TOK_AGG) { + dhp = dtp->dt_aggs; + idp = dt_idhash_lookup(dhp, lp->dn_string + 1); + idkind = DT_IDENT_AGG; + } else { + dhp = dtp->dt_globals; + idp = dt_idstack_lookup( + &yypcb->pcb_globals, lp->dn_string); + idkind = DT_IDENT_ARRAY; + } + + if (idp == NULL || dt_ident_unref(idp)) + dt_xcook_ident(lp, dhp, idkind, B_TRUE); + else + dt_xcook_ident(lp, dhp, idp->di_kind, B_FALSE); + } else + lp = dnp->dn_left = dt_node_cook(lp, 0); + + /* + * Switch op to '+' for *(E1 + E2) array mode in these cases: + * (a) lp is a DT_IDENT_ARRAY variable that has already been + * referenced using [] notation (dn_args != NULL). + * (b) lp is a non-ARRAY variable that has already been given + * a type by assignment or declaration (!dt_ident_unref()) + * (c) lp is neither a variable nor an aggregation + */ + if (lp->dn_kind == DT_NODE_VAR) { + if (lp->dn_ident->di_kind == DT_IDENT_ARRAY) { + if (lp->dn_args != NULL) + op = DT_TOK_ADD; + } else if (!dt_ident_unref(lp->dn_ident)) + op = DT_TOK_ADD; + } else if (lp->dn_kind != DT_NODE_AGG) + op = DT_TOK_ADD; + } + + switch (op) { + case DT_TOK_BAND: + case DT_TOK_XOR: + case DT_TOK_BOR: + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + if (!dt_node_is_integer(lp) || !dt_node_is_integer(rp)) { + xyerror(D_OP_INT, "operator %s requires operands of " + "integral type\n", opstr(op)); + } + + dt_node_promote(lp, rp, dnp); /* see K&R[A7.11-13] */ + break; + + case DT_TOK_LSH: + case DT_TOK_RSH: + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + if (!dt_node_is_integer(lp) || !dt_node_is_integer(rp)) { + xyerror(D_OP_INT, "operator %s requires operands of " + "integral type\n", opstr(op)); + } + + dt_node_type_propagate(lp, dnp); /* see K&R[A7.8] */ + dt_node_attr_assign(dnp, dt_attr_min(lp->dn_attr, rp->dn_attr)); + break; + + case DT_TOK_MOD: + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + if (!dt_node_is_integer(lp) || !dt_node_is_integer(rp)) { + xyerror(D_OP_INT, "operator %s requires operands of " + "integral type\n", opstr(op)); + } + + dt_node_promote(lp, rp, dnp); /* see K&R[A7.6] */ + break; + + case DT_TOK_MUL: + case DT_TOK_DIV: + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + if (!dt_node_is_arith(lp) || !dt_node_is_arith(rp)) { + xyerror(D_OP_ARITH, "operator %s requires operands of " + "arithmetic type\n", opstr(op)); + } + + dt_node_promote(lp, rp, dnp); /* see K&R[A7.6] */ + break; + + case DT_TOK_LAND: + case DT_TOK_LXOR: + case DT_TOK_LOR: + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + if (!dt_node_is_scalar(lp) || !dt_node_is_scalar(rp)) { + xyerror(D_OP_SCALAR, "operator %s requires operands " + "of scalar type\n", opstr(op)); + } + + dt_node_type_assign(dnp, DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), + B_FALSE); + dt_node_attr_assign(dnp, dt_attr_min(lp->dn_attr, rp->dn_attr)); + break; + + case DT_TOK_LT: + case DT_TOK_LE: + case DT_TOK_GT: + case DT_TOK_GE: + case DT_TOK_EQU: + case DT_TOK_NEQ: + /* + * The D comparison operators provide the ability to transform + * a right-hand identifier into a corresponding enum tag value + * if the left-hand side is an enum type. To do this, we cook + * the left-hand side, and then see if the right-hand side is + * an unscoped identifier defined in the enum. If so, we + * convert into an integer constant node with the tag's value. + */ + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + + kind = ctf_type_kind(lp->dn_ctfp, + ctf_type_resolve(lp->dn_ctfp, lp->dn_type)); + + if (kind == CTF_K_ENUM && rp->dn_kind == DT_NODE_IDENT && + strchr(rp->dn_string, '`') == NULL && ctf_enum_value( + lp->dn_ctfp, lp->dn_type, rp->dn_string, &val) == 0) { + + if ((idp = dt_idstack_lookup(&yypcb->pcb_globals, + rp->dn_string)) != NULL) { + xyerror(D_IDENT_AMBIG, + "ambiguous use of operator %s: %s is " + "both a %s enum tag and a global %s\n", + opstr(op), rp->dn_string, + dt_node_type_name(lp, n1, sizeof (n1)), + dt_idkind_name(idp->di_kind)); + } + + free(rp->dn_string); + rp->dn_string = NULL; + rp->dn_kind = DT_NODE_INT; + rp->dn_flags |= DT_NF_COOKED; + rp->dn_op = DT_TOK_INT; + rp->dn_value = (intmax_t)val; + + dt_node_type_assign(rp, lp->dn_ctfp, lp->dn_type, + B_FALSE); + dt_node_attr_assign(rp, _dtrace_symattr); + } + + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + /* + * The rules for type checking for the relational operators are + * described in the ANSI-C spec (see K&R[A7.9-10]). We perform + * the various tests in order from least to most expensive. We + * also allow derived strings to be compared as a first-class + * type (resulting in a strcmp(3C)-style comparison), and we + * slightly relax the A7.9 rules to permit void pointer + * comparisons as in A7.10. Our users won't be confused by + * this since they understand pointers are just numbers, and + * relaxing this constraint simplifies the implementation. + */ + if (ctf_type_compat(lp->dn_ctfp, lp->dn_type, + rp->dn_ctfp, rp->dn_type)) + /*EMPTY*/; + else if (dt_node_is_integer(lp) && dt_node_is_integer(rp)) + /*EMPTY*/; + else if (dt_node_is_strcompat(lp) && dt_node_is_strcompat(rp) && + (dt_node_is_string(lp) || dt_node_is_string(rp))) + /*EMPTY*/; + else if (dt_node_is_ptrcompat(lp, rp, NULL, NULL) == 0) { + xyerror(D_OP_INCOMPAT, "operands have " + "incompatible types: \"%s\" %s \"%s\"\n", + dt_node_type_name(lp, n1, sizeof (n1)), opstr(op), + dt_node_type_name(rp, n2, sizeof (n2))); + } + + dt_node_type_assign(dnp, DT_INT_CTFP(dtp), DT_INT_TYPE(dtp), + B_FALSE); + dt_node_attr_assign(dnp, dt_attr_min(lp->dn_attr, rp->dn_attr)); + break; + + case DT_TOK_ADD: + case DT_TOK_SUB: { + /* + * The rules for type checking for the additive operators are + * described in the ANSI-C spec (see K&R[A7.7]). Pointers and + * integers may be manipulated according to specific rules. In + * these cases D permits strings to be treated as pointers. + */ + int lp_is_ptr, lp_is_int, rp_is_ptr, rp_is_int; + + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + lp_is_ptr = dt_node_is_string(lp) || + (dt_node_is_pointer(lp) && !dt_node_is_vfptr(lp)); + lp_is_int = dt_node_is_integer(lp); + + rp_is_ptr = dt_node_is_string(rp) || + (dt_node_is_pointer(rp) && !dt_node_is_vfptr(rp)); + rp_is_int = dt_node_is_integer(rp); + + if (lp_is_int && rp_is_int) { + dt_type_promote(lp, rp, &ctfp, &type); + uref = 0; + } else if (lp_is_ptr && rp_is_int) { + ctfp = lp->dn_ctfp; + type = lp->dn_type; + uref = lp->dn_flags & DT_NF_USERLAND; + } else if (lp_is_int && rp_is_ptr && op == DT_TOK_ADD) { + ctfp = rp->dn_ctfp; + type = rp->dn_type; + uref = rp->dn_flags & DT_NF_USERLAND; + } else if (lp_is_ptr && rp_is_ptr && op == DT_TOK_SUB && + dt_node_is_ptrcompat(lp, rp, NULL, NULL)) { + ctfp = dtp->dt_ddefs->dm_ctfp; + type = ctf_lookup_by_name(ctfp, "ptrdiff_t"); + uref = 0; + } else { + xyerror(D_OP_INCOMPAT, "operands have incompatible " + "types: \"%s\" %s \"%s\"\n", + dt_node_type_name(lp, n1, sizeof (n1)), opstr(op), + dt_node_type_name(rp, n2, sizeof (n2))); + } + + dt_node_type_assign(dnp, ctfp, type, B_FALSE); + dt_node_attr_assign(dnp, dt_attr_min(lp->dn_attr, rp->dn_attr)); + + if (uref) + dnp->dn_flags |= DT_NF_USERLAND; + break; + } + + case DT_TOK_OR_EQ: + case DT_TOK_XOR_EQ: + case DT_TOK_AND_EQ: + case DT_TOK_LSH_EQ: + case DT_TOK_RSH_EQ: + case DT_TOK_MOD_EQ: + if (lp->dn_kind == DT_NODE_IDENT) { + dt_xcook_ident(lp, dtp->dt_globals, + DT_IDENT_SCALAR, B_TRUE); + } + + lp = dnp->dn_left = + dt_node_cook(lp, DT_IDFLG_REF | DT_IDFLG_MOD); + + rp = dnp->dn_right = + dt_node_cook(rp, DT_IDFLG_REF | DT_IDFLG_MOD); + + if (!dt_node_is_integer(lp) || !dt_node_is_integer(rp)) { + xyerror(D_OP_INT, "operator %s requires operands of " + "integral type\n", opstr(op)); + } + goto asgn_common; + + case DT_TOK_MUL_EQ: + case DT_TOK_DIV_EQ: + if (lp->dn_kind == DT_NODE_IDENT) { + dt_xcook_ident(lp, dtp->dt_globals, + DT_IDENT_SCALAR, B_TRUE); + } + + lp = dnp->dn_left = + dt_node_cook(lp, DT_IDFLG_REF | DT_IDFLG_MOD); + + rp = dnp->dn_right = + dt_node_cook(rp, DT_IDFLG_REF | DT_IDFLG_MOD); + + if (!dt_node_is_arith(lp) || !dt_node_is_arith(rp)) { + xyerror(D_OP_ARITH, "operator %s requires operands of " + "arithmetic type\n", opstr(op)); + } + goto asgn_common; + + case DT_TOK_ASGN: + /* + * If the left-hand side is an identifier, attempt to resolve + * it as either an aggregation or scalar variable. We pass + * B_TRUE to dt_xcook_ident to indicate that a new variable can + * be created if no matching variable exists in the namespace. + */ + if (lp->dn_kind == DT_NODE_IDENT) { + if (lp->dn_op == DT_TOK_AGG) { + dt_xcook_ident(lp, dtp->dt_aggs, + DT_IDENT_AGG, B_TRUE); + } else { + dt_xcook_ident(lp, dtp->dt_globals, + DT_IDENT_SCALAR, B_TRUE); + } + } + + lp = dnp->dn_left = dt_node_cook(lp, 0); /* don't set mod yet */ + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + /* + * If the left-hand side is an aggregation, verify that we are + * assigning it the result of an aggregating function. Once + * we've done so, hide the func node in the aggregation and + * return the aggregation itself up to the parse tree parent. + * This transformation is legal since the assigned function + * cannot change identity across disjoint cooking passes and + * the argument list subtree is retained for later cooking. + */ + if (lp->dn_kind == DT_NODE_AGG) { + const char *aname = lp->dn_ident->di_name; + dt_ident_t *oid = lp->dn_ident->di_iarg; + + if (rp->dn_kind != DT_NODE_FUNC || + rp->dn_ident->di_kind != DT_IDENT_AGGFUNC) { + xyerror(D_AGG_FUNC, + "@%s must be assigned the result of " + "an aggregating function\n", aname); + } + + if (oid != NULL && oid != rp->dn_ident) { + xyerror(D_AGG_REDEF, + "aggregation redefined: @%s\n\t " + "current: @%s = %s( )\n\tprevious: @%s = " + "%s( ) : line %d\n", aname, aname, + rp->dn_ident->di_name, aname, oid->di_name, + lp->dn_ident->di_lineno); + } else if (oid == NULL) + lp->dn_ident->di_iarg = rp->dn_ident; + + /* + * Do not allow multiple aggregation assignments in a + * single statement, e.g. (@a = count()) = count(); + * We produce a message as if the result of aggregating + * function does not propagate DT_NF_LVALUE. + */ + if (lp->dn_aggfun != NULL) { + xyerror(D_OP_LVAL, "operator = requires " + "modifiable lvalue as an operand\n"); + } + + lp->dn_aggfun = rp; + lp = dt_node_cook(lp, DT_IDFLG_MOD); + + dnp->dn_left = dnp->dn_right = NULL; + dt_node_free(dnp); + + return (lp); + } + + /* + * If the right-hand side is a dynamic variable that is the + * output of a translator, our result is the translated type. + */ + if ((idp = dt_node_resolve(rp, DT_IDENT_XLSOU)) != NULL) { + ctfp = idp->di_ctfp; + type = idp->di_type; + uref = idp->di_flags & DT_IDFLG_USER; + } else { + ctfp = rp->dn_ctfp; + type = rp->dn_type; + uref = rp->dn_flags & DT_NF_USERLAND; + } + + /* + * If the left-hand side of an assignment statement is a virgin + * variable created by this compilation pass, reset the type of + * this variable to the type of the right-hand side. + */ + if (lp->dn_kind == DT_NODE_VAR && + dt_ident_unref(lp->dn_ident)) { + dt_node_type_assign(lp, ctfp, type, B_FALSE); + dt_ident_type_assign(lp->dn_ident, ctfp, type); + + if (uref) { + lp->dn_flags |= DT_NF_USERLAND; + lp->dn_ident->di_flags |= DT_IDFLG_USER; + } + } + + if (lp->dn_kind == DT_NODE_VAR) + lp->dn_ident->di_flags |= DT_IDFLG_MOD; + + /* + * The rules for type checking for the assignment operators are + * described in the ANSI-C spec (see K&R[A7.17]). We share + * most of this code with the argument list checking code. + */ + if (!dt_node_is_string(lp)) { + kind = ctf_type_kind(lp->dn_ctfp, + ctf_type_resolve(lp->dn_ctfp, lp->dn_type)); + + if (kind == CTF_K_ARRAY || kind == CTF_K_FUNCTION) { + xyerror(D_OP_ARRFUN, "operator %s may not be " + "applied to operand of type \"%s\"\n", + opstr(op), + dt_node_type_name(lp, n1, sizeof (n1))); + } + } + + if (idp != NULL && idp->di_kind == DT_IDENT_XLSOU && + ctf_type_compat(lp->dn_ctfp, lp->dn_type, ctfp, type)) + goto asgn_common; + + if (dt_node_is_argcompat(lp, rp)) + goto asgn_common; + + xyerror(D_OP_INCOMPAT, + "operands have incompatible types: \"%s\" %s \"%s\"\n", + dt_node_type_name(lp, n1, sizeof (n1)), opstr(op), + dt_node_type_name(rp, n2, sizeof (n2))); + /*NOTREACHED*/ + + case DT_TOK_ADD_EQ: + case DT_TOK_SUB_EQ: + if (lp->dn_kind == DT_NODE_IDENT) { + dt_xcook_ident(lp, dtp->dt_globals, + DT_IDENT_SCALAR, B_TRUE); + } + + lp = dnp->dn_left = + dt_node_cook(lp, DT_IDFLG_REF | DT_IDFLG_MOD); + + rp = dnp->dn_right = + dt_node_cook(rp, DT_IDFLG_REF | DT_IDFLG_MOD); + + if (dt_node_is_string(lp) || dt_node_is_string(rp)) { + xyerror(D_OP_INCOMPAT, "operands have " + "incompatible types: \"%s\" %s \"%s\"\n", + dt_node_type_name(lp, n1, sizeof (n1)), opstr(op), + dt_node_type_name(rp, n2, sizeof (n2))); + } + + /* + * The rules for type checking for the assignment operators are + * described in the ANSI-C spec (see K&R[A7.17]). To these + * rules we add that only writable D nodes can be modified. + */ + if (dt_node_is_integer(lp) == 0 || + dt_node_is_integer(rp) == 0) { + if (!dt_node_is_pointer(lp) || dt_node_is_vfptr(lp)) { + xyerror(D_OP_VFPTR, + "operator %s requires left-hand scalar " + "operand of known size\n", opstr(op)); + } else if (dt_node_is_integer(rp) == 0 && + dt_node_is_ptrcompat(lp, rp, NULL, NULL) == 0) { + xyerror(D_OP_INCOMPAT, "operands have " + "incompatible types: \"%s\" %s \"%s\"\n", + dt_node_type_name(lp, n1, sizeof (n1)), + opstr(op), + dt_node_type_name(rp, n2, sizeof (n2))); + } + } +asgn_common: + dt_assign_common(dnp); + break; + + case DT_TOK_PTR: + /* + * If the left-hand side of operator -> is the name "self", + * then we permit a TLS variable to be created or referenced. + */ + if (lp->dn_kind == DT_NODE_IDENT && + strcmp(lp->dn_string, "self") == 0) { + if (rp->dn_kind != DT_NODE_VAR) { + dt_xcook_ident(rp, dtp->dt_tls, + DT_IDENT_SCALAR, B_TRUE); + } + + if (idflags != 0) + rp = dt_node_cook(rp, idflags); + + dnp->dn_right = dnp->dn_left; /* avoid freeing rp */ + dt_node_free(dnp); + return (rp); + } + + /* + * If the left-hand side of operator -> is the name "this", + * then we permit a local variable to be created or referenced. + */ + if (lp->dn_kind == DT_NODE_IDENT && + strcmp(lp->dn_string, "this") == 0) { + if (rp->dn_kind != DT_NODE_VAR) { + dt_xcook_ident(rp, yypcb->pcb_locals, + DT_IDENT_SCALAR, B_TRUE); + } + + if (idflags != 0) + rp = dt_node_cook(rp, idflags); + + dnp->dn_right = dnp->dn_left; /* avoid freeing rp */ + dt_node_free(dnp); + return (rp); + } + + /*FALLTHRU*/ + + case DT_TOK_DOT: + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + + if (rp->dn_kind != DT_NODE_IDENT) { + xyerror(D_OP_IDENT, "operator %s must be followed by " + "an identifier\n", opstr(op)); + } + + if ((idp = dt_node_resolve(lp, DT_IDENT_XLSOU)) != NULL || + (idp = dt_node_resolve(lp, DT_IDENT_XLPTR)) != NULL) { + /* + * If the left-hand side is a translated struct or ptr, + * the type of the left is the translation output type. + */ + dt_xlator_t *dxp = idp->di_data; + + if (dt_xlator_member(dxp, rp->dn_string) == NULL) { + xyerror(D_XLATE_NOCONV, + "translator does not define conversion " + "for member: %s\n", rp->dn_string); + } + + ctfp = idp->di_ctfp; + type = ctf_type_resolve(ctfp, idp->di_type); + uref = idp->di_flags & DT_IDFLG_USER; + } else { + ctfp = lp->dn_ctfp; + type = ctf_type_resolve(ctfp, lp->dn_type); + uref = lp->dn_flags & DT_NF_USERLAND; + } + + kind = ctf_type_kind(ctfp, type); + + if (op == DT_TOK_PTR) { + if (kind != CTF_K_POINTER) { + xyerror(D_OP_PTR, "operator %s must be " + "applied to a pointer\n", opstr(op)); + } + type = ctf_type_reference(ctfp, type); + type = ctf_type_resolve(ctfp, type); + kind = ctf_type_kind(ctfp, type); + } + + /* + * If we follow a reference to a forward declaration tag, + * search the entire type space for the actual definition. + */ + while (kind == CTF_K_FORWARD) { + char *tag = ctf_type_name(ctfp, type, n1, sizeof (n1)); + dtrace_typeinfo_t dtt; + + if (tag != NULL && dt_type_lookup(tag, &dtt) == 0 && + (dtt.dtt_ctfp != ctfp || dtt.dtt_type != type)) { + ctfp = dtt.dtt_ctfp; + type = ctf_type_resolve(ctfp, dtt.dtt_type); + kind = ctf_type_kind(ctfp, type); + } else { + xyerror(D_OP_INCOMPLETE, + "operator %s cannot be applied to a " + "forward declaration: no %s definition " + "is available\n", opstr(op), tag); + } + } + + if (kind != CTF_K_STRUCT && kind != CTF_K_UNION) { + if (op == DT_TOK_PTR) { + xyerror(D_OP_SOU, "operator -> cannot be " + "applied to pointer to type \"%s\"; must " + "be applied to a struct or union pointer\n", + ctf_type_name(ctfp, type, n1, sizeof (n1))); + } else { + xyerror(D_OP_SOU, "operator %s cannot be " + "applied to type \"%s\"; must be applied " + "to a struct or union\n", opstr(op), + ctf_type_name(ctfp, type, n1, sizeof (n1))); + } + } + + if (ctf_member_info(ctfp, type, rp->dn_string, &m) == CTF_ERR) { + xyerror(D_TYPE_MEMBER, + "%s is not a member of %s\n", rp->dn_string, + ctf_type_name(ctfp, type, n1, sizeof (n1))); + } + + type = ctf_type_resolve(ctfp, m.ctm_type); + kind = ctf_type_kind(ctfp, type); + + dt_node_type_assign(dnp, ctfp, m.ctm_type, B_FALSE); + dt_node_attr_assign(dnp, lp->dn_attr); + + if (op == DT_TOK_PTR && (kind != CTF_K_ARRAY || + dt_node_is_string(dnp))) + dnp->dn_flags |= DT_NF_LVALUE; /* see K&R[A7.3.3] */ + + if (op == DT_TOK_DOT && (lp->dn_flags & DT_NF_LVALUE) && + (kind != CTF_K_ARRAY || dt_node_is_string(dnp))) + dnp->dn_flags |= DT_NF_LVALUE; /* see K&R[A7.3.3] */ + + if (lp->dn_flags & DT_NF_WRITABLE) + dnp->dn_flags |= DT_NF_WRITABLE; + + if (uref && (kind == CTF_K_POINTER || + (dnp->dn_flags & DT_NF_REF))) + dnp->dn_flags |= DT_NF_USERLAND; + break; + + case DT_TOK_LBRAC: { + /* + * If op is DT_TOK_LBRAC, we know from the special-case code at + * the top that lp is either a D variable or an aggregation. + */ + dt_node_t *lnp; + + /* + * If the left-hand side is an aggregation, just set dn_aggtup + * to the right-hand side and return the cooked aggregation. + * This transformation is legal since we are just collapsing + * nodes to simplify later processing, and the entire aggtup + * parse subtree is retained for subsequent cooking passes. + */ + if (lp->dn_kind == DT_NODE_AGG) { + if (lp->dn_aggtup != NULL) { + xyerror(D_AGG_MDIM, "improper attempt to " + "reference @%s as a multi-dimensional " + "array\n", lp->dn_ident->di_name); + } + + lp->dn_aggtup = rp; + lp = dt_node_cook(lp, 0); + + dnp->dn_left = dnp->dn_right = NULL; + dt_node_free(dnp); + + return (lp); + } + + assert(lp->dn_kind == DT_NODE_VAR); + idp = lp->dn_ident; + + /* + * If the left-hand side is a non-global scalar that hasn't yet + * been referenced or modified, it was just created by self-> + * or this-> and we can convert it from scalar to assoc array. + */ + if (idp->di_kind == DT_IDENT_SCALAR && dt_ident_unref(idp) && + (idp->di_flags & (DT_IDFLG_LOCAL | DT_IDFLG_TLS)) != 0) { + + if (idp->di_flags & DT_IDFLG_LOCAL) { + xyerror(D_ARR_LOCAL, + "local variables may not be used as " + "associative arrays: %s\n", idp->di_name); + } + + dt_dprintf("morph variable %s (id %u) from scalar to " + "array\n", idp->di_name, idp->di_id); + + dt_ident_morph(idp, DT_IDENT_ARRAY, + &dt_idops_assc, NULL); + } + + if (idp->di_kind != DT_IDENT_ARRAY) { + xyerror(D_IDENT_BADREF, "%s '%s' may not be referenced " + "as %s\n", dt_idkind_name(idp->di_kind), + idp->di_name, dt_idkind_name(DT_IDENT_ARRAY)); + } + + /* + * Now that we've confirmed our left-hand side is a DT_NODE_VAR + * of idkind DT_IDENT_ARRAY, we need to splice the [ node from + * the parse tree and leave a cooked DT_NODE_VAR in its place + * where dn_args for the VAR node is the right-hand 'rp' tree, + * as shown in the parse tree diagram below: + * + * / / + * [ OP2 "[" ]=dnp [ VAR ]=dnp + * / \ => | + * / \ +- dn_args -> [ ??? ]=rp + * [ VAR ]=lp [ ??? ]=rp + * + * Since the final dt_node_cook(dnp) can fail using longjmp we + * must perform the transformations as a group first by over- + * writing 'dnp' to become the VAR node, so that the parse tree + * is guaranteed to be in a consistent state if the cook fails. + */ + assert(lp->dn_kind == DT_NODE_VAR); + assert(lp->dn_args == NULL); + + lnp = dnp->dn_link; + bcopy(lp, dnp, sizeof (dt_node_t)); + dnp->dn_link = lnp; + + dnp->dn_args = rp; + dnp->dn_list = NULL; + + dt_node_free(lp); + return (dt_node_cook(dnp, idflags)); + } + + case DT_TOK_XLATE: { + dt_xlator_t *dxp; + + assert(lp->dn_kind == DT_NODE_TYPE); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + dxp = dt_xlator_lookup(dtp, rp, lp, DT_XLATE_FUZZY); + + if (dxp == NULL) { + xyerror(D_XLATE_NONE, + "cannot translate from \"%s\" to \"%s\"\n", + dt_node_type_name(rp, n1, sizeof (n1)), + dt_node_type_name(lp, n2, sizeof (n2))); + } + + dnp->dn_ident = dt_xlator_ident(dxp, lp->dn_ctfp, lp->dn_type); + dt_node_type_assign(dnp, DT_DYN_CTFP(dtp), DT_DYN_TYPE(dtp), + B_FALSE); + dt_node_attr_assign(dnp, + dt_attr_min(rp->dn_attr, dnp->dn_ident->di_attr)); + break; + } + + case DT_TOK_LPAR: { + ctf_id_t ltype, rtype; + uint_t lkind, rkind; + + assert(lp->dn_kind == DT_NODE_TYPE); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + ltype = ctf_type_resolve(lp->dn_ctfp, lp->dn_type); + lkind = ctf_type_kind(lp->dn_ctfp, ltype); + + rtype = ctf_type_resolve(rp->dn_ctfp, rp->dn_type); + rkind = ctf_type_kind(rp->dn_ctfp, rtype); + + /* + * The rules for casting are loosely explained in K&R[A7.5] + * and K&R[A6]. Basically, we can cast to the same type or + * same base type, between any kind of scalar values, from + * arrays to pointers, and we can cast anything to void. + * To these rules D adds casts from scalars to strings. + */ + if (ctf_type_compat(lp->dn_ctfp, lp->dn_type, + rp->dn_ctfp, rp->dn_type)) + /*EMPTY*/; + else if (dt_node_is_scalar(lp) && + (dt_node_is_scalar(rp) || rkind == CTF_K_FUNCTION)) + /*EMPTY*/; + else if (dt_node_is_void(lp)) + /*EMPTY*/; + else if (lkind == CTF_K_POINTER && dt_node_is_pointer(rp)) + /*EMPTY*/; + else if (dt_node_is_string(lp) && (dt_node_is_scalar(rp) || + dt_node_is_pointer(rp) || dt_node_is_strcompat(rp))) + /*EMPTY*/; + else { + xyerror(D_CAST_INVAL, + "invalid cast expression: \"%s\" to \"%s\"\n", + dt_node_type_name(rp, n1, sizeof (n1)), + dt_node_type_name(lp, n2, sizeof (n2))); + } + + dt_node_type_propagate(lp, dnp); /* see K&R[A7.5] */ + dt_node_attr_assign(dnp, dt_attr_min(lp->dn_attr, rp->dn_attr)); + + /* + * If it's a pointer then should be able to (attempt to) + * assign to it. + */ + if (lkind == CTF_K_POINTER) + dnp->dn_flags |= DT_NF_WRITABLE; + + break; + } + + case DT_TOK_COMMA: + lp = dnp->dn_left = dt_node_cook(lp, DT_IDFLG_REF); + rp = dnp->dn_right = dt_node_cook(rp, DT_IDFLG_REF); + + if (dt_node_is_dynamic(lp) || dt_node_is_dynamic(rp)) { + xyerror(D_OP_DYN, "operator %s operands " + "cannot be of dynamic type\n", opstr(op)); + } + + if (dt_node_is_actfunc(lp) || dt_node_is_actfunc(rp)) { + xyerror(D_OP_ACT, "operator %s operands " + "cannot be actions\n", opstr(op)); + } + + dt_node_type_propagate(rp, dnp); /* see K&R[A7.18] */ + dt_node_attr_assign(dnp, dt_attr_min(lp->dn_attr, rp->dn_attr)); + break; + + default: + xyerror(D_UNKNOWN, "invalid binary op %s\n", opstr(op)); + } + + /* + * Complete the conversion of E1[E2] to *((E1)+(E2)) that we started + * at the top of our switch() above (see K&R[A7.3.1]). Since E2 is + * parsed as an argument_expression_list by dt_grammar.y, we can + * end up with a comma-separated list inside of a non-associative + * array reference. We check for this and report an appropriate error. + */ + if (dnp->dn_op == DT_TOK_LBRAC && op == DT_TOK_ADD) { + dt_node_t *pnp; + + if (rp->dn_list != NULL) { + xyerror(D_ARR_BADREF, + "cannot access %s as an associative array\n", + dt_node_name(lp, n1, sizeof (n1))); + } + + dnp->dn_op = DT_TOK_ADD; + pnp = dt_node_op1(DT_TOK_DEREF, dnp); + + /* + * Cook callbacks are not typically permitted to allocate nodes. + * When we do, we must insert them in the middle of an existing + * allocation list rather than having them appended to the pcb + * list because the sub-expression may be part of a definition. + */ + assert(yypcb->pcb_list == pnp); + yypcb->pcb_list = pnp->dn_link; + + pnp->dn_link = dnp->dn_link; + dnp->dn_link = pnp; + + return (dt_node_cook(pnp, DT_IDFLG_REF)); + } + + return (dnp); +} + +/*ARGSUSED*/ +static dt_node_t * +dt_cook_op3(dt_node_t *dnp, uint_t idflags) +{ + dt_node_t *lp, *rp; + ctf_file_t *ctfp; + ctf_id_t type; + + dnp->dn_expr = dt_node_cook(dnp->dn_expr, DT_IDFLG_REF); + lp = dnp->dn_left = dt_node_cook(dnp->dn_left, DT_IDFLG_REF); + rp = dnp->dn_right = dt_node_cook(dnp->dn_right, DT_IDFLG_REF); + + if (!dt_node_is_scalar(dnp->dn_expr)) { + xyerror(D_OP_SCALAR, + "operator ?: expression must be of scalar type\n"); + } + + if (dt_node_is_dynamic(lp) || dt_node_is_dynamic(rp)) { + xyerror(D_OP_DYN, + "operator ?: operands cannot be of dynamic type\n"); + } + + /* + * The rules for type checking for the ternary operator are complex and + * are described in the ANSI-C spec (see K&R[A7.16]). We implement + * the various tests in order from least to most expensive. + */ + if (ctf_type_compat(lp->dn_ctfp, lp->dn_type, + rp->dn_ctfp, rp->dn_type)) { + ctfp = lp->dn_ctfp; + type = lp->dn_type; + } else if (dt_node_is_integer(lp) && dt_node_is_integer(rp)) { + dt_type_promote(lp, rp, &ctfp, &type); + } else if (dt_node_is_strcompat(lp) && dt_node_is_strcompat(rp) && + (dt_node_is_string(lp) || dt_node_is_string(rp))) { + ctfp = DT_STR_CTFP(yypcb->pcb_hdl); + type = DT_STR_TYPE(yypcb->pcb_hdl); + } else if (dt_node_is_ptrcompat(lp, rp, &ctfp, &type) == 0) { + xyerror(D_OP_INCOMPAT, + "operator ?: operands must have compatible types\n"); + } + + if (dt_node_is_actfunc(lp) || dt_node_is_actfunc(rp)) { + xyerror(D_OP_ACT, "action cannot be " + "used in a conditional context\n"); + } + + dt_node_type_assign(dnp, ctfp, type, B_FALSE); + dt_node_attr_assign(dnp, dt_attr_min(dnp->dn_expr->dn_attr, + dt_attr_min(lp->dn_attr, rp->dn_attr))); + + return (dnp); +} + +static dt_node_t * +dt_cook_statement(dt_node_t *dnp, uint_t idflags) +{ + dnp->dn_expr = dt_node_cook(dnp->dn_expr, idflags); + dt_node_attr_assign(dnp, dnp->dn_expr->dn_attr); + + return (dnp); +} + +/* + * If dn_aggfun is set, this node is a collapsed aggregation assignment (see + * the special case code for DT_TOK_ASGN in dt_cook_op2() above), in which + * case we cook both the tuple and the function call. If dn_aggfun is NULL, + * this node is just a reference to the aggregation's type and attributes. + */ +/*ARGSUSED*/ +static dt_node_t * +dt_cook_aggregation(dt_node_t *dnp, uint_t idflags) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + + if (dnp->dn_aggfun != NULL) { + dnp->dn_aggfun = dt_node_cook(dnp->dn_aggfun, DT_IDFLG_REF); + dt_node_attr_assign(dnp, dt_ident_cook(dnp, + dnp->dn_ident, &dnp->dn_aggtup)); + } else { + dt_node_type_assign(dnp, DT_DYN_CTFP(dtp), DT_DYN_TYPE(dtp), + B_FALSE); + dt_node_attr_assign(dnp, dnp->dn_ident->di_attr); + } + + return (dnp); +} + +/* + * Since D permits new variable identifiers to be instantiated in any program + * expression, we may need to cook a clause's predicate either before or after + * the action list depending on the program code in question. Consider: + * + * probe-description-list probe-description-list + * /x++/ /x == 0/ + * { { + * trace(x); trace(x++); + * } } + * + * In the left-hand example, the predicate uses operator ++ to instantiate 'x' + * as a variable of type int64_t. The predicate must be cooked first because + * otherwise the statement trace(x) refers to an unknown identifier. In the + * right-hand example, the action list uses ++ to instantiate 'x'; the action + * list must be cooked first because otherwise the predicate x == 0 refers to + * an unknown identifier. In order to simplify programming, we support both. + * + * When cooking a clause, we cook the action statements before the predicate by + * default, since it seems more common to create or modify identifiers in the + * action list. If cooking fails due to an unknown identifier, we attempt to + * cook the predicate (i.e. do it first) and then go back and cook the actions. + * If this, too, fails (or if we get an error other than D_IDENT_UNDEF) we give + * up and report failure back to the user. There are five possible paths: + * + * cook actions = OK, cook predicate = OK -> OK + * cook actions = OK, cook predicate = ERR -> ERR + * cook actions = ERR, cook predicate = ERR -> ERR + * cook actions = ERR, cook predicate = OK, cook actions = OK -> OK + * cook actions = ERR, cook predicate = OK, cook actions = ERR -> ERR + * + * The programmer can still defeat our scheme by creating circular definition + * dependencies between predicates and actions, as in this example clause: + * + * probe-description-list + * /x++ && y == 0/ + * { + * trace(x + y++); + * } + * + * but it doesn't seem worth the complexity to handle such rare cases. The + * user can simply use the D variable declaration syntax to work around them. + */ +static dt_node_t * +dt_cook_clause(dt_node_t *dnp, uint_t idflags) +{ + volatile int err, tries; + jmp_buf ojb; + + /* + * Before assigning dn_ctxattr, temporarily assign the probe attribute + * to 'dnp' itself to force an attribute check and minimum violation. + */ + dt_node_attr_assign(dnp, yypcb->pcb_pinfo.dtp_attr); + dnp->dn_ctxattr = yypcb->pcb_pinfo.dtp_attr; + + bcopy(yypcb->pcb_jmpbuf, ojb, sizeof (jmp_buf)); + tries = 0; + + if (dnp->dn_pred != NULL && (err = setjmp(yypcb->pcb_jmpbuf)) != 0) { + bcopy(ojb, yypcb->pcb_jmpbuf, sizeof (jmp_buf)); + if (tries++ != 0 || err != EDT_COMPILER || ( + yypcb->pcb_hdl->dt_errtag != dt_errtag(D_IDENT_UNDEF) && + yypcb->pcb_hdl->dt_errtag != dt_errtag(D_VAR_UNDEF))) + longjmp(yypcb->pcb_jmpbuf, err); + } + + if (tries == 0) { + yylabel("action list"); + + dt_node_attr_assign(dnp, + dt_node_list_cook(&dnp->dn_acts, idflags)); + + bcopy(ojb, yypcb->pcb_jmpbuf, sizeof (jmp_buf)); + yylabel(NULL); + } + + if (dnp->dn_pred != NULL) { + yylabel("predicate"); + + dnp->dn_pred = dt_node_cook(dnp->dn_pred, idflags); + dt_node_attr_assign(dnp, + dt_attr_min(dnp->dn_attr, dnp->dn_pred->dn_attr)); + + if (!dt_node_is_scalar(dnp->dn_pred)) { + xyerror(D_PRED_SCALAR, + "predicate result must be of scalar type\n"); + } + + yylabel(NULL); + } + + if (tries != 0) { + yylabel("action list"); + + dt_node_attr_assign(dnp, + dt_node_list_cook(&dnp->dn_acts, idflags)); + + yylabel(NULL); + } + + return (dnp); +} + +/*ARGSUSED*/ +static dt_node_t * +dt_cook_inline(dt_node_t *dnp, uint_t idflags) +{ + dt_idnode_t *inp = dnp->dn_ident->di_iarg; + dt_ident_t *rdp; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + assert(dnp->dn_ident->di_flags & DT_IDFLG_INLINE); + assert(inp->din_root->dn_flags & DT_NF_COOKED); + + /* + * If we are inlining a translation, verify that the inline declaration + * type exactly matches the type that is returned by the translation. + * Otherwise just use dt_node_is_argcompat() to check the types. + */ + if ((rdp = dt_node_resolve(inp->din_root, DT_IDENT_XLSOU)) != NULL || + (rdp = dt_node_resolve(inp->din_root, DT_IDENT_XLPTR)) != NULL) { + + ctf_file_t *lctfp = dnp->dn_ctfp; + ctf_id_t ltype = ctf_type_resolve(lctfp, dnp->dn_type); + + dt_xlator_t *dxp = rdp->di_data; + ctf_file_t *rctfp = dxp->dx_dst_ctfp; + ctf_id_t rtype = dxp->dx_dst_base; + + if (ctf_type_kind(lctfp, ltype) == CTF_K_POINTER) { + ltype = ctf_type_reference(lctfp, ltype); + ltype = ctf_type_resolve(lctfp, ltype); + } + + if (ctf_type_compat(lctfp, ltype, rctfp, rtype) == 0) { + dnerror(dnp, D_OP_INCOMPAT, + "inline %s definition uses incompatible types: " + "\"%s\" = \"%s\"\n", dnp->dn_ident->di_name, + dt_type_name(lctfp, ltype, n1, sizeof (n1)), + dt_type_name(rctfp, rtype, n2, sizeof (n2))); + } + + } else if (dt_node_is_argcompat(dnp, inp->din_root) == 0) { + dnerror(dnp, D_OP_INCOMPAT, + "inline %s definition uses incompatible types: " + "\"%s\" = \"%s\"\n", dnp->dn_ident->di_name, + dt_node_type_name(dnp, n1, sizeof (n1)), + dt_node_type_name(inp->din_root, n2, sizeof (n2))); + } + + return (dnp); +} + +static dt_node_t * +dt_cook_member(dt_node_t *dnp, uint_t idflags) +{ + dnp->dn_membexpr = dt_node_cook(dnp->dn_membexpr, idflags); + dt_node_attr_assign(dnp, dnp->dn_membexpr->dn_attr); + return (dnp); +} + +/*ARGSUSED*/ +static dt_node_t * +dt_cook_xlator(dt_node_t *dnp, uint_t idflags) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_xlator_t *dxp = dnp->dn_xlator; + dt_node_t *mnp; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + dtrace_attribute_t attr = _dtrace_maxattr; + ctf_membinfo_t ctm; + + /* + * Before cooking each translator member, we push a reference to the + * hash containing translator-local identifiers on to pcb_globals to + * temporarily interpose these identifiers in front of other globals. + */ + dt_idstack_push(&yypcb->pcb_globals, dxp->dx_locals); + + for (mnp = dnp->dn_members; mnp != NULL; mnp = mnp->dn_list) { + if (ctf_member_info(dxp->dx_dst_ctfp, dxp->dx_dst_type, + mnp->dn_membname, &ctm) == CTF_ERR) { + xyerror(D_XLATE_MEMB, + "translator member %s is not a member of %s\n", + mnp->dn_membname, ctf_type_name(dxp->dx_dst_ctfp, + dxp->dx_dst_type, n1, sizeof (n1))); + } + + (void) dt_node_cook(mnp, DT_IDFLG_REF); + dt_node_type_assign(mnp, dxp->dx_dst_ctfp, ctm.ctm_type, + B_FALSE); + attr = dt_attr_min(attr, mnp->dn_attr); + + if (dt_node_is_argcompat(mnp, mnp->dn_membexpr) == 0) { + xyerror(D_XLATE_INCOMPAT, + "translator member %s definition uses " + "incompatible types: \"%s\" = \"%s\"\n", + mnp->dn_membname, + dt_node_type_name(mnp, n1, sizeof (n1)), + dt_node_type_name(mnp->dn_membexpr, + n2, sizeof (n2))); + } + } + + dt_idstack_pop(&yypcb->pcb_globals, dxp->dx_locals); + + dxp->dx_souid.di_attr = attr; + dxp->dx_ptrid.di_attr = attr; + + dt_node_type_assign(dnp, DT_DYN_CTFP(dtp), DT_DYN_TYPE(dtp), B_FALSE); + dt_node_attr_assign(dnp, _dtrace_defattr); + + return (dnp); +} + +static void +dt_node_provider_cmp_argv(dt_provider_t *pvp, dt_node_t *pnp, const char *kind, + uint_t old_argc, dt_node_t *old_argv, uint_t new_argc, dt_node_t *new_argv) +{ + dt_probe_t *prp = pnp->dn_ident->di_data; + uint_t i; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + if (old_argc != new_argc) { + dnerror(pnp, D_PROV_INCOMPAT, + "probe %s:%s %s prototype mismatch:\n" + "\t current: %u arg%s\n\tprevious: %u arg%s\n", + pvp->pv_desc.dtvd_name, prp->pr_ident->di_name, kind, + new_argc, new_argc != 1 ? "s" : "", + old_argc, old_argc != 1 ? "s" : ""); + } + + for (i = 0; i < old_argc; i++, + old_argv = old_argv->dn_list, new_argv = new_argv->dn_list) { + if (ctf_type_cmp(old_argv->dn_ctfp, old_argv->dn_type, + new_argv->dn_ctfp, new_argv->dn_type) == 0) + continue; + + dnerror(pnp, D_PROV_INCOMPAT, + "probe %s:%s %s prototype argument #%u mismatch:\n" + "\t current: %s\n\tprevious: %s\n", + pvp->pv_desc.dtvd_name, prp->pr_ident->di_name, kind, i + 1, + dt_node_type_name(new_argv, n1, sizeof (n1)), + dt_node_type_name(old_argv, n2, sizeof (n2))); + } +} + +/* + * Compare a new probe declaration with an existing probe definition (either + * from a previous declaration or cached from the kernel). If the existing + * definition and declaration both have an input and output parameter list, + * compare both lists. Otherwise compare only the output parameter lists. + */ +static void +dt_node_provider_cmp(dt_provider_t *pvp, dt_node_t *pnp, + dt_probe_t *old, dt_probe_t *new) +{ + dt_node_provider_cmp_argv(pvp, pnp, "output", + old->pr_xargc, old->pr_xargs, new->pr_xargc, new->pr_xargs); + + if (old->pr_nargs != old->pr_xargs && new->pr_nargs != new->pr_xargs) { + dt_node_provider_cmp_argv(pvp, pnp, "input", + old->pr_nargc, old->pr_nargs, new->pr_nargc, new->pr_nargs); + } + + if (old->pr_nargs == old->pr_xargs && new->pr_nargs != new->pr_xargs) { + if (pvp->pv_flags & DT_PROVIDER_IMPL) { + dnerror(pnp, D_PROV_INCOMPAT, + "provider interface mismatch: %s\n" + "\t current: probe %s:%s has an output prototype\n" + "\tprevious: probe %s:%s has no output prototype\n", + pvp->pv_desc.dtvd_name, pvp->pv_desc.dtvd_name, + new->pr_ident->di_name, pvp->pv_desc.dtvd_name, + old->pr_ident->di_name); + } + + if (old->pr_ident->di_gen == yypcb->pcb_hdl->dt_gen) + old->pr_ident->di_flags |= DT_IDFLG_ORPHAN; + + dt_idhash_delete(pvp->pv_probes, old->pr_ident); + dt_probe_declare(pvp, new); + } +} + +static void +dt_cook_probe(dt_node_t *dnp, dt_provider_t *pvp) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_probe_t *prp = dnp->dn_ident->di_data; + + dt_xlator_t *dxp; + uint_t i; + + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + if (prp->pr_nargs == prp->pr_xargs) + return; + + for (i = 0; i < prp->pr_xargc; i++) { + dt_node_t *xnp = prp->pr_xargv[i]; + dt_node_t *nnp = prp->pr_nargv[prp->pr_mapping[i]]; + + if ((dxp = dt_xlator_lookup(dtp, + nnp, xnp, DT_XLATE_FUZZY)) != NULL) { + if (dt_provider_xref(dtp, pvp, dxp->dx_id) != 0) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + continue; + } + + if (dt_node_is_argcompat(nnp, xnp)) + continue; /* no translator defined and none required */ + + dnerror(dnp, D_PROV_PRXLATOR, "translator for %s:%s output " + "argument #%u from %s to %s is not defined\n", + pvp->pv_desc.dtvd_name, dnp->dn_ident->di_name, i + 1, + dt_node_type_name(nnp, n1, sizeof (n1)), + dt_node_type_name(xnp, n2, sizeof (n2))); + } +} + +/*ARGSUSED*/ +static dt_node_t * +dt_cook_provider(dt_node_t *dnp, uint_t idflags) +{ + dt_provider_t *pvp = dnp->dn_provider; + dt_node_t *pnp; + + /* + * If we're declaring a provider for the first time and it is unknown + * to dtrace(7D), insert the probe definitions into the provider's hash. + * If we're redeclaring a known provider, verify the interface matches. + */ + for (pnp = dnp->dn_probes; pnp != NULL; pnp = pnp->dn_list) { + const char *probename = pnp->dn_ident->di_name; + dt_probe_t *prp = dt_probe_lookup(pvp, probename); + + assert(pnp->dn_kind == DT_NODE_PROBE); + + if (prp != NULL && dnp->dn_provred) { + dt_node_provider_cmp(pvp, pnp, + prp, pnp->dn_ident->di_data); + } else if (prp == NULL && dnp->dn_provred) { + dnerror(pnp, D_PROV_INCOMPAT, + "provider interface mismatch: %s\n" + "\t current: probe %s:%s defined\n" + "\tprevious: probe %s:%s not defined\n", + dnp->dn_provname, dnp->dn_provname, + probename, dnp->dn_provname, probename); + } else if (prp != NULL) { + dnerror(pnp, D_PROV_PRDUP, "probe redeclared: %s:%s\n", + dnp->dn_provname, probename); + } else + dt_probe_declare(pvp, pnp->dn_ident->di_data); + + dt_cook_probe(pnp, pvp); + } + + return (dnp); +} + +/*ARGSUSED*/ +static dt_node_t * +dt_cook_none(dt_node_t *dnp, uint_t idflags) +{ + return (dnp); +} + +static dt_node_t *(*dt_cook_funcs[])(dt_node_t *, uint_t) = { + dt_cook_none, /* DT_NODE_FREE */ + dt_cook_none, /* DT_NODE_INT */ + dt_cook_none, /* DT_NODE_STRING */ + dt_cook_ident, /* DT_NODE_IDENT */ + dt_cook_var, /* DT_NODE_VAR */ + dt_cook_none, /* DT_NODE_SYM */ + dt_cook_none, /* DT_NODE_TYPE */ + dt_cook_func, /* DT_NODE_FUNC */ + dt_cook_op1, /* DT_NODE_OP1 */ + dt_cook_op2, /* DT_NODE_OP2 */ + dt_cook_op3, /* DT_NODE_OP3 */ + dt_cook_statement, /* DT_NODE_DEXPR */ + dt_cook_statement, /* DT_NODE_DFUNC */ + dt_cook_aggregation, /* DT_NODE_AGG */ + dt_cook_none, /* DT_NODE_PDESC */ + dt_cook_clause, /* DT_NODE_CLAUSE */ + dt_cook_inline, /* DT_NODE_INLINE */ + dt_cook_member, /* DT_NODE_MEMBER */ + dt_cook_xlator, /* DT_NODE_XLATOR */ + dt_cook_none, /* DT_NODE_PROBE */ + dt_cook_provider, /* DT_NODE_PROVIDER */ + dt_cook_none /* DT_NODE_PROG */ +}; + +/* + * Recursively cook the parse tree starting at the specified node. The idflags + * parameter is used to indicate the type of reference (r/w) and is applied to + * the resulting identifier if it is a D variable or D aggregation. + */ +dt_node_t * +dt_node_cook(dt_node_t *dnp, uint_t idflags) +{ + int oldlineno = yylineno; + + yylineno = dnp->dn_line; + + dnp = dt_cook_funcs[dnp->dn_kind](dnp, idflags); + dnp->dn_flags |= DT_NF_COOKED; + + if (dnp->dn_kind == DT_NODE_VAR || dnp->dn_kind == DT_NODE_AGG) + dnp->dn_ident->di_flags |= idflags; + + yylineno = oldlineno; + return (dnp); +} + +dtrace_attribute_t +dt_node_list_cook(dt_node_t **pnp, uint_t idflags) +{ + dtrace_attribute_t attr = _dtrace_defattr; + dt_node_t *dnp, *nnp; + + for (dnp = (pnp != NULL ? *pnp : NULL); dnp != NULL; dnp = nnp) { + nnp = dnp->dn_list; + dnp = *pnp = dt_node_cook(dnp, idflags); + attr = dt_attr_min(attr, dnp->dn_attr); + dnp->dn_list = nnp; + pnp = &dnp->dn_list; + } + + return (attr); +} + +void +dt_node_list_free(dt_node_t **pnp) +{ + dt_node_t *dnp, *nnp; + + for (dnp = (pnp != NULL ? *pnp : NULL); dnp != NULL; dnp = nnp) { + nnp = dnp->dn_list; + dt_node_free(dnp); + } + + if (pnp != NULL) + *pnp = NULL; +} + +void +dt_node_link_free(dt_node_t **pnp) +{ + dt_node_t *dnp, *nnp; + + for (dnp = (pnp != NULL ? *pnp : NULL); dnp != NULL; dnp = nnp) { + nnp = dnp->dn_link; + dt_node_free(dnp); + } + + for (dnp = (pnp != NULL ? *pnp : NULL); dnp != NULL; dnp = nnp) { + nnp = dnp->dn_link; + free(dnp); + } + + if (pnp != NULL) + *pnp = NULL; +} + +dt_node_t * +dt_node_link(dt_node_t *lp, dt_node_t *rp) +{ + dt_node_t *dnp; + + if (lp == NULL) + return (rp); + else if (rp == NULL) + return (lp); + + for (dnp = lp; dnp->dn_list != NULL; dnp = dnp->dn_list) + continue; + + dnp->dn_list = rp; + return (lp); +} + +/* + * Compute the DOF dtrace_diftype_t representation of a node's type. This is + * called from a variety of places in the library so it cannot assume yypcb + * is valid: any references to handle-specific data must be made through 'dtp'. + */ +void +dt_node_diftype(dtrace_hdl_t *dtp, const dt_node_t *dnp, dtrace_diftype_t *tp) +{ + if (dnp->dn_ctfp == DT_STR_CTFP(dtp) && + dnp->dn_type == DT_STR_TYPE(dtp)) { + tp->dtdt_kind = DIF_TYPE_STRING; + tp->dtdt_ckind = CTF_K_UNKNOWN; + } else { + tp->dtdt_kind = DIF_TYPE_CTF; + tp->dtdt_ckind = ctf_type_kind(dnp->dn_ctfp, + ctf_type_resolve(dnp->dn_ctfp, dnp->dn_type)); + } + + tp->dtdt_flags = (dnp->dn_flags & DT_NF_REF) ? + (dnp->dn_flags & DT_NF_USERLAND) ? DIF_TF_BYUREF : + DIF_TF_BYREF : 0; + tp->dtdt_pad = 0; + tp->dtdt_size = ctf_type_size(dnp->dn_ctfp, dnp->dn_type); +} + +void +dt_node_printr(dt_node_t *dnp, FILE *fp, int depth) +{ + char n[DT_TYPE_NAMELEN], buf[BUFSIZ], a[8]; + const dtrace_syminfo_t *dts; + const dt_idnode_t *inp; + dt_node_t *arg; + + (void) fprintf(fp, "%*s", depth * 2, ""); + (void) dt_attr_str(dnp->dn_attr, a, sizeof (a)); + + if (dnp->dn_ctfp != NULL && dnp->dn_type != CTF_ERR && + ctf_type_name(dnp->dn_ctfp, dnp->dn_type, n, sizeof (n)) != NULL) { + (void) snprintf(buf, BUFSIZ, "type=<%s> attr=%s flags=", n, a); + } else { + (void) snprintf(buf, BUFSIZ, "type=<%ld> attr=%s flags=", + dnp->dn_type, a); + } + + if (dnp->dn_flags != 0) { + n[0] = '\0'; + if (dnp->dn_flags & DT_NF_SIGNED) + (void) strcat(n, ",SIGN"); + if (dnp->dn_flags & DT_NF_COOKED) + (void) strcat(n, ",COOK"); + if (dnp->dn_flags & DT_NF_REF) + (void) strcat(n, ",REF"); + if (dnp->dn_flags & DT_NF_LVALUE) + (void) strcat(n, ",LVAL"); + if (dnp->dn_flags & DT_NF_WRITABLE) + (void) strcat(n, ",WRITE"); + if (dnp->dn_flags & DT_NF_BITFIELD) + (void) strcat(n, ",BITF"); + if (dnp->dn_flags & DT_NF_USERLAND) + (void) strcat(n, ",USER"); + (void) strcat(buf, n + 1); + } else + (void) strcat(buf, "0"); + + switch (dnp->dn_kind) { + case DT_NODE_FREE: + (void) fprintf(fp, "FREE <node %p>\n", (void *)dnp); + break; + + case DT_NODE_INT: + (void) fprintf(fp, "INT 0x%llx (%s)\n", + (u_longlong_t)dnp->dn_value, buf); + break; + + case DT_NODE_STRING: + (void) fprintf(fp, "STRING \"%s\" (%s)\n", dnp->dn_string, buf); + break; + + case DT_NODE_IDENT: + (void) fprintf(fp, "IDENT %s (%s)\n", dnp->dn_string, buf); + break; + + case DT_NODE_VAR: + (void) fprintf(fp, "VARIABLE %s%s (%s)\n", + (dnp->dn_ident->di_flags & DT_IDFLG_LOCAL) ? "this->" : + (dnp->dn_ident->di_flags & DT_IDFLG_TLS) ? "self->" : "", + dnp->dn_ident->di_name, buf); + + if (dnp->dn_args != NULL) + (void) fprintf(fp, "%*s[\n", depth * 2, ""); + + for (arg = dnp->dn_args; arg != NULL; arg = arg->dn_list) { + dt_node_printr(arg, fp, depth + 1); + if (arg->dn_list != NULL) + (void) fprintf(fp, "%*s,\n", depth * 2, ""); + } + + if (dnp->dn_args != NULL) + (void) fprintf(fp, "%*s]\n", depth * 2, ""); + break; + + case DT_NODE_SYM: + dts = dnp->dn_ident->di_data; + (void) fprintf(fp, "SYMBOL %s`%s (%s)\n", + dts->dts_object, dts->dts_name, buf); + break; + + case DT_NODE_TYPE: + if (dnp->dn_string != NULL) { + (void) fprintf(fp, "TYPE (%s) %s\n", + buf, dnp->dn_string); + } else + (void) fprintf(fp, "TYPE (%s)\n", buf); + break; + + case DT_NODE_FUNC: + (void) fprintf(fp, "FUNC %s (%s)\n", + dnp->dn_ident->di_name, buf); + + for (arg = dnp->dn_args; arg != NULL; arg = arg->dn_list) { + dt_node_printr(arg, fp, depth + 1); + if (arg->dn_list != NULL) + (void) fprintf(fp, "%*s,\n", depth * 2, ""); + } + break; + + case DT_NODE_OP1: + (void) fprintf(fp, "OP1 %s (%s)\n", opstr(dnp->dn_op), buf); + dt_node_printr(dnp->dn_child, fp, depth + 1); + break; + + case DT_NODE_OP2: + (void) fprintf(fp, "OP2 %s (%s)\n", opstr(dnp->dn_op), buf); + dt_node_printr(dnp->dn_left, fp, depth + 1); + dt_node_printr(dnp->dn_right, fp, depth + 1); + break; + + case DT_NODE_OP3: + (void) fprintf(fp, "OP3 (%s)\n", buf); + dt_node_printr(dnp->dn_expr, fp, depth + 1); + (void) fprintf(fp, "%*s?\n", depth * 2, ""); + dt_node_printr(dnp->dn_left, fp, depth + 1); + (void) fprintf(fp, "%*s:\n", depth * 2, ""); + dt_node_printr(dnp->dn_right, fp, depth + 1); + break; + + case DT_NODE_DEXPR: + case DT_NODE_DFUNC: + (void) fprintf(fp, "D EXPRESSION attr=%s\n", a); + dt_node_printr(dnp->dn_expr, fp, depth + 1); + break; + + case DT_NODE_AGG: + (void) fprintf(fp, "AGGREGATE @%s attr=%s [\n", + dnp->dn_ident->di_name, a); + + for (arg = dnp->dn_aggtup; arg != NULL; arg = arg->dn_list) { + dt_node_printr(arg, fp, depth + 1); + if (arg->dn_list != NULL) + (void) fprintf(fp, "%*s,\n", depth * 2, ""); + } + + if (dnp->dn_aggfun) { + (void) fprintf(fp, "%*s] = ", depth * 2, ""); + dt_node_printr(dnp->dn_aggfun, fp, depth + 1); + } else + (void) fprintf(fp, "%*s]\n", depth * 2, ""); + + if (dnp->dn_aggfun) + (void) fprintf(fp, "%*s)\n", depth * 2, ""); + break; + + case DT_NODE_PDESC: + (void) fprintf(fp, "PDESC %s:%s:%s:%s [%u]\n", + dnp->dn_desc->dtpd_provider, dnp->dn_desc->dtpd_mod, + dnp->dn_desc->dtpd_func, dnp->dn_desc->dtpd_name, + dnp->dn_desc->dtpd_id); + break; + + case DT_NODE_CLAUSE: + (void) fprintf(fp, "CLAUSE attr=%s\n", a); + + for (arg = dnp->dn_pdescs; arg != NULL; arg = arg->dn_list) + dt_node_printr(arg, fp, depth + 1); + + (void) fprintf(fp, "%*sCTXATTR %s\n", depth * 2, "", + dt_attr_str(dnp->dn_ctxattr, a, sizeof (a))); + + if (dnp->dn_pred != NULL) { + (void) fprintf(fp, "%*sPREDICATE /\n", depth * 2, ""); + dt_node_printr(dnp->dn_pred, fp, depth + 1); + (void) fprintf(fp, "%*s/\n", depth * 2, ""); + } + + for (arg = dnp->dn_acts; arg != NULL; arg = arg->dn_list) + dt_node_printr(arg, fp, depth + 1); + break; + + case DT_NODE_INLINE: + inp = dnp->dn_ident->di_iarg; + + (void) fprintf(fp, "INLINE %s (%s)\n", + dnp->dn_ident->di_name, buf); + dt_node_printr(inp->din_root, fp, depth + 1); + break; + + case DT_NODE_MEMBER: + (void) fprintf(fp, "MEMBER %s (%s)\n", dnp->dn_membname, buf); + if (dnp->dn_membexpr) + dt_node_printr(dnp->dn_membexpr, fp, depth + 1); + break; + + case DT_NODE_XLATOR: + (void) fprintf(fp, "XLATOR (%s)", buf); + + if (ctf_type_name(dnp->dn_xlator->dx_src_ctfp, + dnp->dn_xlator->dx_src_type, n, sizeof (n)) != NULL) + (void) fprintf(fp, " from <%s>", n); + + if (ctf_type_name(dnp->dn_xlator->dx_dst_ctfp, + dnp->dn_xlator->dx_dst_type, n, sizeof (n)) != NULL) + (void) fprintf(fp, " to <%s>", n); + + (void) fprintf(fp, "\n"); + + for (arg = dnp->dn_members; arg != NULL; arg = arg->dn_list) + dt_node_printr(arg, fp, depth + 1); + break; + + case DT_NODE_PROBE: + (void) fprintf(fp, "PROBE %s\n", dnp->dn_ident->di_name); + break; + + case DT_NODE_PROVIDER: + (void) fprintf(fp, "PROVIDER %s (%s)\n", + dnp->dn_provname, dnp->dn_provred ? "redecl" : "decl"); + for (arg = dnp->dn_probes; arg != NULL; arg = arg->dn_list) + dt_node_printr(arg, fp, depth + 1); + break; + + case DT_NODE_PROG: + (void) fprintf(fp, "PROGRAM attr=%s\n", a); + for (arg = dnp->dn_list; arg != NULL; arg = arg->dn_list) + dt_node_printr(arg, fp, depth + 1); + break; + + default: + (void) fprintf(fp, "<bad node %p, kind %d>\n", + (void *)dnp, dnp->dn_kind); + } +} + +int +dt_node_root(dt_node_t *dnp) +{ + yypcb->pcb_root = dnp; + return (0); +} + +/*PRINTFLIKE3*/ +void +dnerror(const dt_node_t *dnp, dt_errtag_t tag, const char *format, ...) +{ + int oldlineno = yylineno; + va_list ap; + + yylineno = dnp->dn_line; + + va_start(ap, format); + xyvwarn(tag, format, ap); + va_end(ap); + + yylineno = oldlineno; + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); +} + +/*PRINTFLIKE3*/ +void +dnwarn(const dt_node_t *dnp, dt_errtag_t tag, const char *format, ...) +{ + int oldlineno = yylineno; + va_list ap; + + yylineno = dnp->dn_line; + + va_start(ap, format); + xyvwarn(tag, format, ap); + va_end(ap); + + yylineno = oldlineno; +} + +/*PRINTFLIKE2*/ +void +xyerror(dt_errtag_t tag, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + xyvwarn(tag, format, ap); + va_end(ap); + + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); +} + +/*PRINTFLIKE2*/ +void +xywarn(dt_errtag_t tag, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + xyvwarn(tag, format, ap); + va_end(ap); +} + +void +xyvwarn(dt_errtag_t tag, const char *format, va_list ap) +{ + if (yypcb == NULL) + return; /* compiler is not currently active: act as a no-op */ + + dt_set_errmsg(yypcb->pcb_hdl, dt_errtag(tag), yypcb->pcb_region, + yypcb->pcb_filetag, yypcb->pcb_fileptr ? yylineno : 0, format, ap); +} + +/*PRINTFLIKE1*/ +void +yyerror(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + yyvwarn(format, ap); + va_end(ap); + + longjmp(yypcb->pcb_jmpbuf, EDT_COMPILER); +} + +/*PRINTFLIKE1*/ +void +yywarn(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + yyvwarn(format, ap); + va_end(ap); +} + +void +yyvwarn(const char *format, va_list ap) +{ + if (yypcb == NULL) + return; /* compiler is not currently active: act as a no-op */ + + dt_set_errmsg(yypcb->pcb_hdl, dt_errtag(D_SYNTAX), yypcb->pcb_region, + yypcb->pcb_filetag, yypcb->pcb_fileptr ? yylineno : 0, format, ap); + + if (strchr(format, '\n') == NULL) { + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + size_t len = strlen(dtp->dt_errmsg); + char *p, *s = dtp->dt_errmsg + len; + size_t n = sizeof (dtp->dt_errmsg) - len; + + if (yytext[0] == '\0') + (void) snprintf(s, n, " near end of input"); + else if (yytext[0] == '\n') + (void) snprintf(s, n, " near end of line"); + else { + if ((p = strchr(yytext, '\n')) != NULL) + *p = '\0'; /* crop at newline */ + (void) snprintf(s, n, " near \"%s\"", yytext); + } + } +} + +void +yylabel(const char *label) +{ + dt_dprintf("set label to <%s>\n", label ? label : "NULL"); + yypcb->pcb_region = label; +} + +int +yywrap(void) +{ + return (1); /* indicate that lex should return a zero token for EOF */ +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_parser.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_parser.h new file mode 100644 index 0000000..38f21c9 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_parser.h @@ -0,0 +1,287 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. + */ + +#ifndef _DT_PARSER_H +#define _DT_PARSER_H + +#include <sys/types.h> +#include <sys/dtrace.h> + +#include <libctf.h> +#include <stdarg.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#include <dt_errtags.h> +#include <dt_ident.h> +#include <dt_decl.h> +#include <dt_xlator.h> +#include <dt_list.h> + +typedef struct dt_node { + ctf_file_t *dn_ctfp; /* CTF type container for node's type */ + ctf_id_t dn_type; /* CTF type reference for node's type */ + uchar_t dn_kind; /* node kind (DT_NODE_*, defined below) */ + uchar_t dn_flags; /* node flags (DT_NF_*, defined below) */ + ushort_t dn_op; /* operator (DT_TOK_*, defined by lex) */ + int dn_line; /* line number for error messages */ + int dn_reg; /* register allocated by cg */ + dtrace_attribute_t dn_attr; /* node stability attributes */ + + /* + * D compiler nodes, as is the usual style, contain a union of the + * different sub-elements required by the various kinds of nodes. + * These sub-elements are accessed using the macros defined below. + */ + union { + struct { + uintmax_t _value; /* integer value */ + char *_string; /* string value */ + } _const; + + struct { + dt_ident_t *_ident; /* identifier reference */ + struct dt_node *_links[3]; /* child node pointers */ + } _nodes; + + struct { + struct dt_node *_descs; /* list of descriptions */ + struct dt_node *_pred; /* predicate expression */ + struct dt_node *_acts; /* action statement list */ + dt_idhash_t *_locals; /* local variable hash */ + dtrace_attribute_t _attr; /* context attributes */ + } _clause; + + struct { + char *_spec; /* specifier string (if any) */ + dtrace_probedesc_t *_desc; /* final probe description */ + } _pdesc; + + struct { + char *_name; /* string name of member */ + struct dt_node *_expr; /* expression node pointer */ + dt_xlator_t *_xlator; /* translator reference */ + uint_t _id; /* member identifier */ + } _member; + + struct { + dt_xlator_t *_xlator; /* translator reference */ + struct dt_node *_xmemb; /* individual xlator member */ + struct dt_node *_membs; /* list of member nodes */ + } _xlator; + + struct { + char *_name; /* string name of provider */ + struct dt_provider *_pvp; /* provider references */ + struct dt_node *_probes; /* list of probe nodes */ + int _redecl; /* provider redeclared */ + } _provider; + } dn_u; + + struct dt_node *dn_list; /* parse tree list link */ + struct dt_node *dn_link; /* allocation list link */ +} dt_node_t; + +#define dn_value dn_u._const._value /* DT_NODE_INT */ +#define dn_string dn_u._const._string /* STRING, IDENT, TYPE */ +#define dn_ident dn_u._nodes._ident /* VAR,SYM,FUN,AGG,INL,PROBE */ +#define dn_args dn_u._nodes._links[0] /* DT_NODE_VAR, FUNC */ +#define dn_child dn_u._nodes._links[0] /* DT_NODE_OP1 */ +#define dn_left dn_u._nodes._links[0] /* DT_NODE_OP2, OP3 */ +#define dn_right dn_u._nodes._links[1] /* DT_NODE_OP2, OP3 */ +#define dn_expr dn_u._nodes._links[2] /* DT_NODE_OP3, DEXPR */ +#define dn_aggfun dn_u._nodes._links[0] /* DT_NODE_AGG */ +#define dn_aggtup dn_u._nodes._links[1] /* DT_NODE_AGG */ +#define dn_pdescs dn_u._clause._descs /* DT_NODE_CLAUSE */ +#define dn_pred dn_u._clause._pred /* DT_NODE_CLAUSE */ +#define dn_acts dn_u._clause._acts /* DT_NODE_CLAUSE */ +#define dn_locals dn_u._clause._locals /* DT_NODE_CLAUSE */ +#define dn_ctxattr dn_u._clause._attr /* DT_NODE_CLAUSE */ +#define dn_spec dn_u._pdesc._spec /* DT_NODE_PDESC */ +#define dn_desc dn_u._pdesc._desc /* DT_NODE_PDESC */ +#define dn_membname dn_u._member._name /* DT_NODE_MEMBER */ +#define dn_membexpr dn_u._member._expr /* DT_NODE_MEMBER */ +#define dn_membxlator dn_u._member._xlator /* DT_NODE_MEMBER */ +#define dn_membid dn_u._member._id /* DT_NODE_MEMBER */ +#define dn_xlator dn_u._xlator._xlator /* DT_NODE_XLATOR */ +#define dn_xmember dn_u._xlator._xmemb /* DT_NODE_XLATOR */ +#define dn_members dn_u._xlator._membs /* DT_NODE_XLATOR */ +#define dn_provname dn_u._provider._name /* DT_NODE_PROVIDER */ +#define dn_provider dn_u._provider._pvp /* DT_NODE_PROVIDER */ +#define dn_provred dn_u._provider._redecl /* DT_NODE_PROVIDER */ +#define dn_probes dn_u._provider._probes /* DT_NODE_PROVIDER */ + +#define DT_NODE_FREE 0 /* unused node (waiting to be freed) */ +#define DT_NODE_INT 1 /* integer value */ +#define DT_NODE_STRING 2 /* string value */ +#define DT_NODE_IDENT 3 /* identifier */ +#define DT_NODE_VAR 4 /* variable reference */ +#define DT_NODE_SYM 5 /* symbol reference */ +#define DT_NODE_TYPE 6 /* type reference or formal parameter */ +#define DT_NODE_FUNC 7 /* function call */ +#define DT_NODE_OP1 8 /* unary operator */ +#define DT_NODE_OP2 9 /* binary operator */ +#define DT_NODE_OP3 10 /* ternary operator */ +#define DT_NODE_DEXPR 11 /* D expression action */ +#define DT_NODE_DFUNC 12 /* D function action */ +#define DT_NODE_AGG 13 /* aggregation */ +#define DT_NODE_PDESC 14 /* probe description */ +#define DT_NODE_CLAUSE 15 /* clause definition */ +#define DT_NODE_INLINE 16 /* inline definition */ +#define DT_NODE_MEMBER 17 /* member definition */ +#define DT_NODE_XLATOR 18 /* translator definition */ +#define DT_NODE_PROBE 19 /* probe definition */ +#define DT_NODE_PROVIDER 20 /* provider definition */ +#define DT_NODE_PROG 21 /* program translation unit */ + +#define DT_NF_SIGNED 0x01 /* data is a signed quantity (else unsigned) */ +#define DT_NF_COOKED 0x02 /* data is a known type (else still cooking) */ +#define DT_NF_REF 0x04 /* pass by reference (array, struct, union) */ +#define DT_NF_LVALUE 0x08 /* node is an l-value according to ANSI-C */ +#define DT_NF_WRITABLE 0x10 /* node is writable (can be modified) */ +#define DT_NF_BITFIELD 0x20 /* node is an integer bitfield */ +#define DT_NF_USERLAND 0x40 /* data is a userland address */ + +#define DT_TYPE_NAMELEN 128 /* reasonable size for ctf_type_name() */ + +extern int dt_node_is_integer(const dt_node_t *); +extern int dt_node_is_float(const dt_node_t *); +extern int dt_node_is_scalar(const dt_node_t *); +extern int dt_node_is_arith(const dt_node_t *); +extern int dt_node_is_vfptr(const dt_node_t *); +extern int dt_node_is_dynamic(const dt_node_t *); +extern int dt_node_is_stack(const dt_node_t *); +extern int dt_node_is_symaddr(const dt_node_t *); +extern int dt_node_is_usymaddr(const dt_node_t *); +extern int dt_node_is_string(const dt_node_t *); +extern int dt_node_is_strcompat(const dt_node_t *); +extern int dt_node_is_pointer(const dt_node_t *); +extern int dt_node_is_void(const dt_node_t *); +extern int dt_node_is_ptrcompat(const dt_node_t *, const dt_node_t *, + ctf_file_t **, ctf_id_t *); +extern int dt_node_is_argcompat(const dt_node_t *, const dt_node_t *); +extern int dt_node_is_posconst(const dt_node_t *); +extern int dt_node_is_actfunc(const dt_node_t *); + +extern dt_node_t *dt_node_int(uintmax_t); +extern dt_node_t *dt_node_string(char *); +extern dt_node_t *dt_node_ident(char *); +extern dt_node_t *dt_node_type(dt_decl_t *); +extern dt_node_t *dt_node_vatype(void); +extern dt_node_t *dt_node_decl(void); +extern dt_node_t *dt_node_func(dt_node_t *, dt_node_t *); +extern dt_node_t *dt_node_offsetof(dt_decl_t *, char *); +extern dt_node_t *dt_node_op1(int, dt_node_t *); +extern dt_node_t *dt_node_op2(int, dt_node_t *, dt_node_t *); +extern dt_node_t *dt_node_op3(dt_node_t *, dt_node_t *, dt_node_t *); +extern dt_node_t *dt_node_statement(dt_node_t *); +extern dt_node_t *dt_node_pdesc_by_name(char *); +extern dt_node_t *dt_node_pdesc_by_id(uintmax_t); +extern dt_node_t *dt_node_clause(dt_node_t *, dt_node_t *, dt_node_t *); +extern dt_node_t *dt_node_inline(dt_node_t *); +extern dt_node_t *dt_node_member(dt_decl_t *, char *, dt_node_t *); +extern dt_node_t *dt_node_xlator(dt_decl_t *, dt_decl_t *, char *, dt_node_t *); +extern dt_node_t *dt_node_probe(char *, int, dt_node_t *, dt_node_t *); +extern dt_node_t *dt_node_provider(char *, dt_node_t *); +extern dt_node_t *dt_node_program(dt_node_t *); + +extern dt_node_t *dt_node_link(dt_node_t *, dt_node_t *); +extern dt_node_t *dt_node_cook(dt_node_t *, uint_t); + +extern dt_node_t *dt_node_xalloc(dtrace_hdl_t *, int); +extern void dt_node_free(dt_node_t *); + +extern dtrace_attribute_t dt_node_list_cook(dt_node_t **, uint_t); +extern void dt_node_list_free(dt_node_t **); +extern void dt_node_link_free(dt_node_t **); + +extern void dt_node_attr_assign(dt_node_t *, dtrace_attribute_t); +extern void dt_node_type_assign(dt_node_t *, ctf_file_t *, ctf_id_t, boolean_t); +extern void dt_node_type_propagate(const dt_node_t *, dt_node_t *); +extern const char *dt_node_type_name(const dt_node_t *, char *, size_t); +extern size_t dt_node_type_size(const dt_node_t *); + +extern dt_ident_t *dt_node_resolve(const dt_node_t *, uint_t); +extern size_t dt_node_sizeof(const dt_node_t *); +extern void dt_node_promote(dt_node_t *, dt_node_t *, dt_node_t *); + +extern void dt_node_diftype(dtrace_hdl_t *, + const dt_node_t *, dtrace_diftype_t *); +extern void dt_node_printr(dt_node_t *, FILE *, int); +extern const char *dt_node_name(const dt_node_t *, char *, size_t); +extern int dt_node_root(dt_node_t *); + +struct dtrace_typeinfo; /* see <dtrace.h> */ +struct dt_pcb; /* see <dt_impl.h> */ + +#define IS_CHAR(e) \ + (((e).cte_format & (CTF_INT_CHAR | CTF_INT_SIGNED)) == \ + (CTF_INT_CHAR | CTF_INT_SIGNED) && (e).cte_bits == NBBY) + +#define IS_VOID(e) \ + ((e).cte_offset == 0 && (e).cte_bits == 0) + +extern int dt_type_lookup(const char *, struct dtrace_typeinfo *); +extern int dt_type_pointer(struct dtrace_typeinfo *); +extern const char *dt_type_name(ctf_file_t *, ctf_id_t, char *, size_t); + +typedef enum { + YYS_CLAUSE, /* lex/yacc state for finding program clauses */ + YYS_DEFINE, /* lex/yacc state for parsing persistent definitions */ + YYS_EXPR, /* lex/yacc state for parsing D expressions */ + YYS_DONE, /* lex/yacc state for indicating parse tree is done */ + YYS_CONTROL /* lex/yacc state for parsing control lines */ +} yystate_t; + +extern void dnerror(const dt_node_t *, dt_errtag_t, const char *, ...); +extern void dnwarn(const dt_node_t *, dt_errtag_t, const char *, ...); + +extern void xyerror(dt_errtag_t, const char *, ...); +extern void xywarn(dt_errtag_t, const char *, ...); +extern void xyvwarn(dt_errtag_t, const char *, va_list); + +extern void yyerror(const char *, ...); +extern void yywarn(const char *, ...); +extern void yyvwarn(const char *, va_list); + +extern void yylabel(const char *); +extern void yybegin(yystate_t); +extern void yyinit(struct dt_pcb *); + +extern int yyparse(void); +extern int yyinput(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_PARSER_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pcb.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pcb.c new file mode 100644 index 0000000..d80c359 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pcb.c @@ -0,0 +1,187 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * DTrace Parsing Control Block + * + * A DTrace Parsing Control Block (PCB) contains all of the state that is used + * by a single pass of the D compiler, other than the global variables used by + * lex and yacc. The routines in this file are used to set up and tear down + * PCBs, which are kept on a stack pointed to by the libdtrace global 'yypcb'. + * The main engine of the compiler, dt_compile(), is located in dt_cc.c and is + * responsible for calling these routines to begin and end a compilation pass. + * + * Sun's lex/yacc are not MT-safe or re-entrant, but we permit limited nested + * use of dt_compile() once the entire parse tree has been constructed but has + * not yet executed the "cooking" pass (see dt_cc.c for more information). The + * PCB design also makes it easier to debug (since all global state is kept in + * one place) and could permit us to make the D compiler MT-safe or re-entrant + * in the future by adding locks to libdtrace or switching to Flex and Bison. + */ + +#include <strings.h> +#include <stdlib.h> +#include <assert.h> + +#include <dt_impl.h> +#include <dt_program.h> +#include <dt_provider.h> +#include <dt_pcb.h> + +/* + * Initialize the specified PCB by zeroing it and filling in a few default + * members, and then pushing it on to the top of the PCB stack and setting + * yypcb to point to it. Increment the current handle's generation count. + */ +void +dt_pcb_push(dtrace_hdl_t *dtp, dt_pcb_t *pcb) +{ + /* + * Since lex/yacc are not re-entrant and we don't implement state save, + * assert that if another PCB is active, it is from the same handle and + * has completed execution of yyparse(). If the first assertion fires, + * the caller is calling libdtrace without proper MT locking. If the + * second assertion fires, dt_compile() is being called recursively + * from an illegal location in libdtrace, or a dt_pcb_pop() is missing. + */ + if (yypcb != NULL) { + assert(yypcb->pcb_hdl == dtp); + assert(yypcb->pcb_yystate == YYS_DONE); + } + + bzero(pcb, sizeof (dt_pcb_t)); + + dt_scope_create(&pcb->pcb_dstack); + dt_idstack_push(&pcb->pcb_globals, dtp->dt_globals); + dt_irlist_create(&pcb->pcb_ir); + + pcb->pcb_hdl = dtp; + pcb->pcb_prev = dtp->dt_pcb; + + dtp->dt_pcb = pcb; + dtp->dt_gen++; + + yyinit(pcb); +} + +static int +dt_pcb_pop_ident(dt_idhash_t *dhp, dt_ident_t *idp, void *arg) +{ + dtrace_hdl_t *dtp = arg; + + if (idp->di_gen == dtp->dt_gen) + dt_idhash_delete(dhp, idp); + + return (0); +} + +/* + * Pop the topmost PCB from the PCB stack and destroy any data structures that + * are associated with it. If 'err' is non-zero, destroy any intermediate + * state that is left behind as part of a compilation that has failed. + */ +void +dt_pcb_pop(dtrace_hdl_t *dtp, int err) +{ + dt_pcb_t *pcb = yypcb; + uint_t i; + + assert(pcb != NULL); + assert(pcb == dtp->dt_pcb); + + while (pcb->pcb_dstack.ds_next != NULL) + (void) dt_scope_pop(); + + dt_scope_destroy(&pcb->pcb_dstack); + dt_irlist_destroy(&pcb->pcb_ir); + + dt_node_link_free(&pcb->pcb_list); + dt_node_link_free(&pcb->pcb_hold); + + if (err != 0) { + dt_xlator_t *dxp, *nxp; + dt_provider_t *pvp, *nvp; + + if (pcb->pcb_prog != NULL) + dt_program_destroy(dtp, pcb->pcb_prog); + if (pcb->pcb_stmt != NULL) + dtrace_stmt_destroy(dtp, pcb->pcb_stmt); + if (pcb->pcb_ecbdesc != NULL) + dt_ecbdesc_release(dtp, pcb->pcb_ecbdesc); + + for (dxp = dt_list_next(&dtp->dt_xlators); dxp; dxp = nxp) { + nxp = dt_list_next(dxp); + if (dxp->dx_gen == dtp->dt_gen) + dt_xlator_destroy(dtp, dxp); + } + + for (pvp = dt_list_next(&dtp->dt_provlist); pvp; pvp = nvp) { + nvp = dt_list_next(pvp); + if (pvp->pv_gen == dtp->dt_gen) + dt_provider_destroy(dtp, pvp); + } + + (void) dt_idhash_iter(dtp->dt_aggs, dt_pcb_pop_ident, dtp); + dt_idhash_update(dtp->dt_aggs); + + (void) dt_idhash_iter(dtp->dt_globals, dt_pcb_pop_ident, dtp); + dt_idhash_update(dtp->dt_globals); + + (void) dt_idhash_iter(dtp->dt_tls, dt_pcb_pop_ident, dtp); + dt_idhash_update(dtp->dt_tls); + + (void) ctf_discard(dtp->dt_cdefs->dm_ctfp); + (void) ctf_discard(dtp->dt_ddefs->dm_ctfp); + } + + if (pcb->pcb_pragmas != NULL) + dt_idhash_destroy(pcb->pcb_pragmas); + if (pcb->pcb_locals != NULL) + dt_idhash_destroy(pcb->pcb_locals); + if (pcb->pcb_idents != NULL) + dt_idhash_destroy(pcb->pcb_idents); + if (pcb->pcb_inttab != NULL) + dt_inttab_destroy(pcb->pcb_inttab); + if (pcb->pcb_strtab != NULL) + dt_strtab_destroy(pcb->pcb_strtab); + if (pcb->pcb_regs != NULL) + dt_regset_destroy(pcb->pcb_regs); + + for (i = 0; i < pcb->pcb_asxreflen; i++) + dt_free(dtp, pcb->pcb_asxrefs[i]); + + dt_free(dtp, pcb->pcb_asxrefs); + dt_difo_free(dtp, pcb->pcb_difo); + + free(pcb->pcb_filetag); + free(pcb->pcb_sflagv); + + dtp->dt_pcb = pcb->pcb_prev; + bzero(pcb, sizeof (dt_pcb_t)); + yyinit(dtp->dt_pcb); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pcb.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pcb.h new file mode 100644 index 0000000..0ba2c6b --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pcb.h @@ -0,0 +1,103 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_PCB_H +#define _DT_PCB_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <dtrace.h> +#include <setjmp.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#include <dt_parser.h> +#include <dt_regset.h> +#include <dt_inttab.h> +#include <dt_strtab.h> +#include <dt_decl.h> +#include <dt_as.h> + +typedef struct dt_pcb { + dtrace_hdl_t *pcb_hdl; /* pointer to library handle */ + struct dt_pcb *pcb_prev; /* pointer to previous pcb in stack */ + FILE *pcb_fileptr; /* pointer to input file (or NULL) */ + char *pcb_filetag; /* optional file name string (or NULL) */ + const char *pcb_string; /* pointer to input string (or NULL) */ + const char *pcb_strptr; /* pointer to input position */ + size_t pcb_strlen; /* length of pcb_string */ + int pcb_sargc; /* number of script arguments (if any) */ + char *const *pcb_sargv; /* script argument strings (if any) */ + ushort_t *pcb_sflagv; /* script argument flags (DT_IDFLG_* bits) */ + dt_scope_t pcb_dstack; /* declaration processing stack */ + dt_node_t *pcb_list; /* list of allocated parse tree nodes */ + dt_node_t *pcb_hold; /* parse tree nodes on hold until end of defn */ + dt_node_t *pcb_root; /* root of current parse tree */ + dt_idstack_t pcb_globals; /* stack of global identifier hash tables */ + dt_idhash_t *pcb_locals; /* current hash table of local identifiers */ + dt_idhash_t *pcb_idents; /* current hash table of ambiguous idents */ + dt_idhash_t *pcb_pragmas; /* current hash table of pending pragmas */ + dt_inttab_t *pcb_inttab; /* integer table for constant references */ + dt_strtab_t *pcb_strtab; /* string table for string references */ + dt_regset_t *pcb_regs; /* register set for code generation */ + dt_irlist_t pcb_ir; /* list of unrelocated IR instructions */ + uint_t pcb_asvidx; /* assembler vartab index (see dt_as.c) */ + ulong_t **pcb_asxrefs; /* assembler imported xlators (see dt_as.c) */ + uint_t pcb_asxreflen; /* assembler xlator map length (see dt_as.c) */ + const dtrace_probedesc_t *pcb_pdesc; /* probedesc for current context */ + struct dt_probe *pcb_probe; /* probe associated with current context */ + dtrace_probeinfo_t pcb_pinfo; /* info associated with current context */ + dtrace_attribute_t pcb_amin; /* stability minimum for compilation */ + dt_node_t *pcb_dret; /* node containing return type for assembler */ + dtrace_difo_t *pcb_difo; /* intermediate DIF object made by assembler */ + dtrace_prog_t *pcb_prog; /* intermediate program made by compiler */ + dtrace_stmtdesc_t *pcb_stmt; /* intermediate stmt made by compiler */ + dtrace_ecbdesc_t *pcb_ecbdesc; /* intermediate ecbdesc made by cmplr */ + jmp_buf pcb_jmpbuf; /* setjmp(3C) buffer for error return */ + const char *pcb_region; /* optional region name for yyerror() suffix */ + dtrace_probespec_t pcb_pspec; /* probe description evaluation context */ + uint_t pcb_cflags; /* optional compilation flags (see dtrace.h) */ + uint_t pcb_idepth; /* preprocessor #include nesting depth */ + yystate_t pcb_yystate; /* lex/yacc parsing state (see yybegin()) */ + int pcb_context; /* yyparse() rules context (DT_CTX_* value) */ + int pcb_token; /* token to be returned by yylex() (if != 0) */ + int pcb_cstate; /* state to be restored by lexer at state end */ + int pcb_braces; /* number of open curly braces in lexer */ + int pcb_brackets; /* number of open square brackets in lexer */ + int pcb_parens; /* number of open parentheses in lexer */ +} dt_pcb_t; + +extern void dt_pcb_push(dtrace_hdl_t *, dt_pcb_t *); +extern void dt_pcb_pop(dtrace_hdl_t *, int); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_PCB_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pid.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pid.c new file mode 100644 index 0000000..6c529e5 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pid.c @@ -0,0 +1,983 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <assert.h> +#include <strings.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <ctype.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <libgen.h> +#include <stddef.h> +#include <sys/sysmacros.h> + +#include <dt_impl.h> +#include <dt_program.h> +#include <dt_pid.h> +#include <dt_string.h> +#if !defined(sun) +#include <libproc_compat.h> +#endif +#include <dt_module.h> + +typedef struct dt_pid_probe { + dtrace_hdl_t *dpp_dtp; + dt_pcb_t *dpp_pcb; + dt_proc_t *dpp_dpr; + struct ps_prochandle *dpp_pr; + const char *dpp_mod; + char *dpp_func; + const char *dpp_name; + const char *dpp_obj; + uintptr_t dpp_pc; + size_t dpp_size; + Lmid_t dpp_lmid; + uint_t dpp_nmatches; + uint64_t dpp_stret[4]; + GElf_Sym dpp_last; + uint_t dpp_last_taken; +} dt_pid_probe_t; + +/* + * Compose the lmid and object name into the canonical representation. We + * omit the lmid for the default link map for convenience. + */ +static void +dt_pid_objname(char *buf, size_t len, Lmid_t lmid, const char *obj) +{ +#if defined(sun) + if (lmid == LM_ID_BASE) + (void) strncpy(buf, obj, len); + else + (void) snprintf(buf, len, "LM%lx`%s", lmid, obj); +#else + (void) strncpy(buf, obj, len); +#endif +} + +static int +dt_pid_error(dtrace_hdl_t *dtp, dt_pcb_t *pcb, dt_proc_t *dpr, + fasttrap_probe_spec_t *ftp, dt_errtag_t tag, const char *fmt, ...) +{ + va_list ap; + int len; + + if (ftp != NULL) + dt_free(dtp, ftp); + + va_start(ap, fmt); + if (pcb == NULL) { + assert(dpr != NULL); + len = vsnprintf(dpr->dpr_errmsg, sizeof (dpr->dpr_errmsg), + fmt, ap); + assert(len >= 2); + if (dpr->dpr_errmsg[len - 2] == '\n') + dpr->dpr_errmsg[len - 2] = '\0'; + } else { + dt_set_errmsg(dtp, dt_errtag(tag), pcb->pcb_region, + pcb->pcb_filetag, pcb->pcb_fileptr ? yylineno : 0, fmt, ap); + } + va_end(ap); + + return (1); +} + +static int +dt_pid_per_sym(dt_pid_probe_t *pp, const GElf_Sym *symp, const char *func) +{ + dtrace_hdl_t *dtp = pp->dpp_dtp; + dt_pcb_t *pcb = pp->dpp_pcb; + dt_proc_t *dpr = pp->dpp_dpr; + fasttrap_probe_spec_t *ftp; + uint64_t off; + char *end; + uint_t nmatches = 0; + ulong_t sz; + int glob, err; + int isdash = strcmp("-", func) == 0; + pid_t pid; + +#if defined(sun) + pid = Pstatus(pp->dpp_pr)->pr_pid; +#else + pid = proc_getpid(pp->dpp_pr); +#endif + + dt_dprintf("creating probe pid%d:%s:%s:%s\n", (int)pid, pp->dpp_obj, + func, pp->dpp_name); + + sz = sizeof (fasttrap_probe_spec_t) + (isdash ? 4 : + (symp->st_size - 1) * sizeof (ftp->ftps_offs[0])); + + if ((ftp = dt_alloc(dtp, sz)) == NULL) { + dt_dprintf("proc_per_sym: dt_alloc(%lu) failed\n", sz); + return (1); /* errno is set for us */ + } + + ftp->ftps_pid = pid; + (void) strncpy(ftp->ftps_func, func, sizeof (ftp->ftps_func)); + + dt_pid_objname(ftp->ftps_mod, sizeof (ftp->ftps_mod), pp->dpp_lmid, + pp->dpp_obj); + + if (!isdash && gmatch("return", pp->dpp_name)) { + if (dt_pid_create_return_probe(pp->dpp_pr, dtp, ftp, symp, + pp->dpp_stret) < 0) { + return (dt_pid_error(dtp, pcb, dpr, ftp, + D_PROC_CREATEFAIL, "failed to create return probe " + "for '%s': %s", func, + dtrace_errmsg(dtp, dtrace_errno(dtp)))); + } + + nmatches++; + } + + if (!isdash && gmatch("entry", pp->dpp_name)) { + if (dt_pid_create_entry_probe(pp->dpp_pr, dtp, ftp, symp) < 0) { + return (dt_pid_error(dtp, pcb, dpr, ftp, + D_PROC_CREATEFAIL, "failed to create entry probe " + "for '%s': %s", func, + dtrace_errmsg(dtp, dtrace_errno(dtp)))); + } + + nmatches++; + } + + glob = strisglob(pp->dpp_name); + if (!glob && nmatches == 0) { + off = strtoull(pp->dpp_name, &end, 16); + if (*end != '\0') { + return (dt_pid_error(dtp, pcb, dpr, ftp, D_PROC_NAME, + "'%s' is an invalid probe name", pp->dpp_name)); + } + + if (off >= symp->st_size) { + return (dt_pid_error(dtp, pcb, dpr, ftp, D_PROC_OFF, + "offset 0x%llx outside of function '%s'", + (u_longlong_t)off, func)); + } + + err = dt_pid_create_offset_probe(pp->dpp_pr, pp->dpp_dtp, ftp, + symp, off); + + if (err == DT_PROC_ERR) { + return (dt_pid_error(dtp, pcb, dpr, ftp, + D_PROC_CREATEFAIL, "failed to create probe at " + "'%s+0x%llx': %s", func, (u_longlong_t)off, + dtrace_errmsg(dtp, dtrace_errno(dtp)))); + } + + if (err == DT_PROC_ALIGN) { + return (dt_pid_error(dtp, pcb, dpr, ftp, D_PROC_ALIGN, + "offset 0x%llx is not aligned on an instruction", + (u_longlong_t)off)); + } + + nmatches++; + + } else if (glob && !isdash) { + if (dt_pid_create_glob_offset_probes(pp->dpp_pr, + pp->dpp_dtp, ftp, symp, pp->dpp_name) < 0) { + return (dt_pid_error(dtp, pcb, dpr, ftp, + D_PROC_CREATEFAIL, + "failed to create offset probes in '%s': %s", func, + dtrace_errmsg(dtp, dtrace_errno(dtp)))); + } + + nmatches++; + } + + pp->dpp_nmatches += nmatches; + + dt_free(dtp, ftp); + + return (0); +} + +static int +dt_pid_sym_filt(void *arg, const GElf_Sym *symp, const char *func) +{ + dt_pid_probe_t *pp = arg; + + if (symp->st_shndx == SHN_UNDEF) + return (0); + + if (symp->st_size == 0) { + dt_dprintf("st_size of %s is zero\n", func); + return (0); + } + + if (pp->dpp_last_taken == 0 || + symp->st_value != pp->dpp_last.st_value || + symp->st_size != pp->dpp_last.st_size) { + /* + * Due to 4524008, _init and _fini may have a bloated st_size. + * While this bug has been fixed for a while, old binaries + * may exist that still exhibit this problem. As a result, we + * don't match _init and _fini though we allow users to + * specify them explicitly. + */ + if (strcmp(func, "_init") == 0 || strcmp(func, "_fini") == 0) + return (0); + + if ((pp->dpp_last_taken = gmatch(func, pp->dpp_func)) != 0) { + pp->dpp_last = *symp; + return (dt_pid_per_sym(pp, symp, func)); + } + } + + return (0); +} + +static int +dt_pid_per_mod(void *arg, const prmap_t *pmp, const char *obj) +{ + dt_pid_probe_t *pp = arg; + dtrace_hdl_t *dtp = pp->dpp_dtp; + dt_pcb_t *pcb = pp->dpp_pcb; + dt_proc_t *dpr = pp->dpp_dpr; + GElf_Sym sym; + + if (obj == NULL) + return (0); + +#if defined(sun) + (void) Plmid(pp->dpp_pr, pmp->pr_vaddr, &pp->dpp_lmid); +#endif + + + if ((pp->dpp_obj = strrchr(obj, '/')) == NULL) + pp->dpp_obj = obj; + else + pp->dpp_obj++; +#if defined(sun) + if (Pxlookup_by_name(pp->dpp_pr, pp->dpp_lmid, obj, ".stret1", &sym, + NULL) == 0) + pp->dpp_stret[0] = sym.st_value; + else + pp->dpp_stret[0] = 0; + + if (Pxlookup_by_name(pp->dpp_pr, pp->dpp_lmid, obj, ".stret2", &sym, + NULL) == 0) + pp->dpp_stret[1] = sym.st_value; + else + pp->dpp_stret[1] = 0; + + if (Pxlookup_by_name(pp->dpp_pr, pp->dpp_lmid, obj, ".stret4", &sym, + NULL) == 0) + pp->dpp_stret[2] = sym.st_value; + else + pp->dpp_stret[2] = 0; + + if (Pxlookup_by_name(pp->dpp_pr, pp->dpp_lmid, obj, ".stret8", &sym, + NULL) == 0) + pp->dpp_stret[3] = sym.st_value; + else + pp->dpp_stret[3] = 0; +#else + pp->dpp_stret[0] = 0; + pp->dpp_stret[1] = 0; + pp->dpp_stret[2] = 0; + pp->dpp_stret[3] = 0; +#endif + + dt_dprintf("%s stret %llx %llx %llx %llx\n", obj, + (u_longlong_t)pp->dpp_stret[0], (u_longlong_t)pp->dpp_stret[1], + (u_longlong_t)pp->dpp_stret[2], (u_longlong_t)pp->dpp_stret[3]); + + /* + * If pp->dpp_func contains any globbing meta-characters, we need + * to iterate over the symbol table and compare each function name + * against the pattern. + */ + if (!strisglob(pp->dpp_func)) { + /* + * If we fail to lookup the symbol, try interpreting the + * function as the special "-" function that indicates that the + * probe name should be interpreted as a absolute virtual + * address. If that fails and we were matching a specific + * function in a specific module, report the error, otherwise + * just fail silently in the hopes that some other object will + * contain the desired symbol. + */ + if (Pxlookup_by_name(pp->dpp_pr, pp->dpp_lmid, obj, + pp->dpp_func, &sym, NULL) != 0) { + if (strcmp("-", pp->dpp_func) == 0) { + sym.st_name = 0; + sym.st_info = + GELF_ST_INFO(STB_LOCAL, STT_FUNC); + sym.st_other = 0; + sym.st_value = 0; +#if defined(sun) + sym.st_size = Pstatus(pp->dpp_pr)->pr_dmodel == + PR_MODEL_ILP32 ? -1U : -1ULL; +#else + sym.st_size = ~((Elf64_Xword) 0); +#endif + + } else if (!strisglob(pp->dpp_mod)) { + return (dt_pid_error(dtp, pcb, dpr, NULL, + D_PROC_FUNC, + "failed to lookup '%s' in module '%s'", + pp->dpp_func, pp->dpp_mod)); + } else { + return (0); + } + } + + /* + * Only match defined functions of non-zero size. + */ + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC || + sym.st_shndx == SHN_UNDEF || sym.st_size == 0) + return (0); + + /* + * We don't instrument PLTs -- they're dynamically rewritten, + * and, so, inherently dicey to instrument. + */ +#ifdef DOODAD + if (Ppltdest(pp->dpp_pr, sym.st_value) != NULL) + return (0); +#endif + + (void) Plookup_by_addr(pp->dpp_pr, sym.st_value, pp->dpp_func, + DTRACE_FUNCNAMELEN, &sym); + + return (dt_pid_per_sym(pp, &sym, pp->dpp_func)); + } else { + uint_t nmatches = pp->dpp_nmatches; + + if (Psymbol_iter_by_addr(pp->dpp_pr, obj, PR_SYMTAB, + BIND_ANY | TYPE_FUNC, dt_pid_sym_filt, pp) == 1) + return (1); + + if (nmatches == pp->dpp_nmatches) { + /* + * If we didn't match anything in the PR_SYMTAB, try + * the PR_DYNSYM. + */ + if (Psymbol_iter_by_addr(pp->dpp_pr, obj, PR_DYNSYM, + BIND_ANY | TYPE_FUNC, dt_pid_sym_filt, pp) == 1) + return (1); + } + } + + return (0); +} + +static int +dt_pid_mod_filt(void *arg, const prmap_t *pmp, const char *obj) +{ + char name[DTRACE_MODNAMELEN]; + dt_pid_probe_t *pp = arg; + + if (gmatch(obj, pp->dpp_mod)) + return (dt_pid_per_mod(pp, pmp, obj)); + +#if defined(sun) + (void) Plmid(pp->dpp_pr, pmp->pr_vaddr, &pp->dpp_lmid); +#else + pp->dpp_lmid = 0; +#endif + + if ((pp->dpp_obj = strrchr(obj, '/')) == NULL) + pp->dpp_obj = obj; + else + pp->dpp_obj++; + + if (gmatch(pp->dpp_obj, pp->dpp_mod)) + return (dt_pid_per_mod(pp, pmp, obj)); + +#if defined(sun) + (void) Plmid(pp->dpp_pr, pmp->pr_vaddr, &pp->dpp_lmid); +#endif + + dt_pid_objname(name, sizeof (name), pp->dpp_lmid, pp->dpp_obj); + + if (gmatch(name, pp->dpp_mod)) + return (dt_pid_per_mod(pp, pmp, obj)); + + return (0); +} + +static const prmap_t * +dt_pid_fix_mod(dtrace_probedesc_t *pdp, struct ps_prochandle *P) +{ + char m[MAXPATHLEN]; + Lmid_t lmid = PR_LMID_EVERY; + const char *obj; + const prmap_t *pmp; + + /* + * Pick apart the link map from the library name. + */ + if (strchr(pdp->dtpd_mod, '`') != NULL) { + char *end; + + if (strncmp(pdp->dtpd_mod, "LM", 2) != 0 || + !isdigit(pdp->dtpd_mod[2])) + return (NULL); + + lmid = strtoul(&pdp->dtpd_mod[2], &end, 16); + + obj = end + 1; + + if (*end != '`' || strchr(obj, '`') != NULL) + return (NULL); + + } else { + obj = pdp->dtpd_mod; + } + + if ((pmp = Plmid_to_map(P, lmid, obj)) == NULL) + return (NULL); + + (void) Pobjname(P, pmp->pr_vaddr, m, sizeof (m)); + if ((obj = strrchr(m, '/')) == NULL) + obj = &m[0]; + else + obj++; + +#if defined(sun) + (void) Plmid(P, pmp->pr_vaddr, &lmid); +#endif + + dt_pid_objname(pdp->dtpd_mod, sizeof (pdp->dtpd_mod), lmid, obj); + + return (pmp); +} + + +static int +dt_pid_create_pid_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, + dt_pcb_t *pcb, dt_proc_t *dpr) +{ + dt_pid_probe_t pp; + int ret = 0; + + pp.dpp_dtp = dtp; + pp.dpp_dpr = dpr; + pp.dpp_pr = dpr->dpr_proc; + pp.dpp_pcb = pcb; + +#ifdef DOODAD + /* + * We can only trace dynamically-linked executables (since we've + * hidden some magic in ld.so.1 as well as libc.so.1). + */ + if (Pname_to_map(pp.dpp_pr, PR_OBJ_LDSO) == NULL) { + return (dt_pid_error(dtp, pcb, dpr, NULL, D_PROC_DYN, + "process %s is not a dynamically-linked executable", + &pdp->dtpd_provider[3])); + } +#endif + + pp.dpp_mod = pdp->dtpd_mod[0] != '\0' ? pdp->dtpd_mod : "*"; + pp.dpp_func = pdp->dtpd_func[0] != '\0' ? pdp->dtpd_func : "*"; + pp.dpp_name = pdp->dtpd_name[0] != '\0' ? pdp->dtpd_name : "*"; + pp.dpp_last_taken = 0; + + if (strcmp(pp.dpp_func, "-") == 0) { + const prmap_t *aout, *pmp; + + if (pdp->dtpd_mod[0] == '\0') { + pp.dpp_mod = pdp->dtpd_mod; + (void) strcpy(pdp->dtpd_mod, "a.out"); + } else if (strisglob(pp.dpp_mod) || + (aout = Pname_to_map(pp.dpp_pr, "a.out")) == NULL || + (pmp = Pname_to_map(pp.dpp_pr, pp.dpp_mod)) == NULL || + aout->pr_vaddr != pmp->pr_vaddr) { + return (dt_pid_error(dtp, pcb, dpr, NULL, D_PROC_LIB, + "only the a.out module is valid with the " + "'-' function")); + } + + if (strisglob(pp.dpp_name)) { + return (dt_pid_error(dtp, pcb, dpr, NULL, D_PROC_NAME, + "only individual addresses may be specified " + "with the '-' function")); + } + } + + /* + * If pp.dpp_mod contains any globbing meta-characters, we need + * to iterate over each module and compare its name against the + * pattern. An empty module name is treated as '*'. + */ + if (strisglob(pp.dpp_mod)) { + ret = Pobject_iter(pp.dpp_pr, dt_pid_mod_filt, &pp); + } else { + const prmap_t *pmp; + char *obj; + + /* + * If we can't find a matching module, don't sweat it -- either + * we'll fail the enabling because the probes don't exist or + * we'll wait for that module to come along. + */ + if ((pmp = dt_pid_fix_mod(pdp, pp.dpp_pr)) != NULL) { + if ((obj = strchr(pdp->dtpd_mod, '`')) == NULL) + obj = pdp->dtpd_mod; + else + obj++; + + ret = dt_pid_per_mod(&pp, pmp, obj); + } + } + + return (ret); +} + +static int +dt_pid_usdt_mapping(void *data, const prmap_t *pmp, const char *oname) +{ + struct ps_prochandle *P = data; + GElf_Sym sym; + prsyminfo_t sip; + dof_helper_t dh; + GElf_Half e_type; + const char *mname; + const char *syms[] = { "___SUNW_dof", "__SUNW_dof" }; + int i, fd = -1; + + /* + * The symbol ___SUNW_dof is for lazy-loaded DOF sections, and + * __SUNW_dof is for actively-loaded DOF sections. We try to force + * in both types of DOF section since the process may not yet have + * run the code to instantiate these providers. + */ + for (i = 0; i < 2; i++) { + if (Pxlookup_by_name(P, PR_LMID_EVERY, oname, syms[i], &sym, + &sip) != 0) { + continue; + } + + if ((mname = strrchr(oname, '/')) == NULL) + mname = oname; + else + mname++; + + dt_dprintf("lookup of %s succeeded for %s\n", syms[i], mname); + + if (Pread(P, &e_type, sizeof (e_type), pmp->pr_vaddr + + offsetof(Elf64_Ehdr, e_type)) != sizeof (e_type)) { + dt_dprintf("read of ELF header failed"); + continue; + } + + dh.dofhp_dof = sym.st_value; + dh.dofhp_addr = (e_type == ET_EXEC) ? 0 : pmp->pr_vaddr; + + dt_pid_objname(dh.dofhp_mod, sizeof (dh.dofhp_mod), +#if defined(sun) + sip.prs_lmid, mname); +#else + 0, mname); +#endif + +#if defined(sun) + if (fd == -1 && + (fd = pr_open(P, "/dev/dtrace/helper", O_RDWR, 0)) < 0) { + dt_dprintf("pr_open of helper device failed: %s\n", + strerror(errno)); + return (-1); /* errno is set for us */ + } + + if (pr_ioctl(P, fd, DTRACEHIOC_ADDDOF, &dh, sizeof (dh)) < 0) + dt_dprintf("DOF was rejected for %s\n", dh.dofhp_mod); +#endif + } + +#if defined(sun) + if (fd != -1) + (void) pr_close(P, fd); +#endif + + return (0); +} + +static int +dt_pid_create_usdt_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, + dt_pcb_t *pcb, dt_proc_t *dpr) +{ + struct ps_prochandle *P = dpr->dpr_proc; + int ret = 0; + + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); +#if defined(sun) + (void) Pupdate_maps(P); + if (Pobject_iter(P, dt_pid_usdt_mapping, P) != 0) { + ret = -1; + (void) dt_pid_error(dtp, pcb, dpr, NULL, D_PROC_USDT, + "failed to instantiate probes for pid %d: %s", +#if defined(sun) + (int)Pstatus(P)->pr_pid, strerror(errno)); +#else + (int)proc_getpid(P), strerror(errno)); +#endif + } +#else + ret = 0; +#endif + + /* + * Put the module name in its canonical form. + */ + (void) dt_pid_fix_mod(pdp, P); + + return (ret); +} + +static pid_t +dt_pid_get_pid(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb, + dt_proc_t *dpr) +{ + pid_t pid; + char *c, *last = NULL, *end; + + for (c = &pdp->dtpd_provider[0]; *c != '\0'; c++) { + if (!isdigit(*c)) + last = c; + } + + if (last == NULL || (*(++last) == '\0')) { + (void) dt_pid_error(dtp, pcb, dpr, NULL, D_PROC_BADPROV, + "'%s' is not a valid provider", pdp->dtpd_provider); + return (-1); + } + + errno = 0; + pid = strtol(last, &end, 10); + + if (errno != 0 || end == last || end[0] != '\0' || pid <= 0) { + (void) dt_pid_error(dtp, pcb, dpr, NULL, D_PROC_BADPID, + "'%s' does not contain a valid pid", pdp->dtpd_provider); + return (-1); + } + + return (pid); +} + +int +dt_pid_create_probes(dtrace_probedesc_t *pdp, dtrace_hdl_t *dtp, dt_pcb_t *pcb) +{ + char provname[DTRACE_PROVNAMELEN]; + struct ps_prochandle *P; + dt_proc_t *dpr; + pid_t pid; + int err = 0; + + assert(pcb != NULL); + + if ((pid = dt_pid_get_pid(pdp, dtp, pcb, NULL)) == -1) + return (-1); + + if (dtp->dt_ftfd == -1) { + if (dtp->dt_fterr == ENOENT) { + (void) dt_pid_error(dtp, pcb, NULL, NULL, D_PROC_NODEV, + "pid provider is not installed on this system"); + } else { + (void) dt_pid_error(dtp, pcb, NULL, NULL, D_PROC_NODEV, + "pid provider is not available: %s", + strerror(dtp->dt_fterr)); + } + + return (-1); + } + + (void) snprintf(provname, sizeof (provname), "pid%d", (int)pid); + + if (gmatch(provname, pdp->dtpd_provider) != 0) { + if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, + 0)) == NULL) { + (void) dt_pid_error(dtp, pcb, NULL, NULL, D_PROC_GRAB, + "failed to grab process %d", (int)pid); + return (-1); + } + + dpr = dt_proc_lookup(dtp, P, 0); + assert(dpr != NULL); + (void) pthread_mutex_lock(&dpr->dpr_lock); + + if ((err = dt_pid_create_pid_probes(pdp, dtp, pcb, dpr)) == 0) { + /* + * Alert other retained enablings which may match + * against the newly created probes. + */ + (void) dt_ioctl(dtp, DTRACEIOC_ENABLE, NULL); + } + + (void) pthread_mutex_unlock(&dpr->dpr_lock); + dt_proc_release(dtp, P); + } + + /* + * If it's not strictly a pid provider, we might match a USDT provider. + */ + if (strcmp(provname, pdp->dtpd_provider) != 0) { + if ((P = dt_proc_grab(dtp, pid, 0, 1)) == NULL) { + (void) dt_pid_error(dtp, pcb, NULL, NULL, D_PROC_GRAB, + "failed to grab process %d", (int)pid); + return (-1); + } + + dpr = dt_proc_lookup(dtp, P, 0); + assert(dpr != NULL); + (void) pthread_mutex_lock(&dpr->dpr_lock); + + if (!dpr->dpr_usdt) { + err = dt_pid_create_usdt_probes(pdp, dtp, pcb, dpr); + dpr->dpr_usdt = B_TRUE; + } + + (void) pthread_mutex_unlock(&dpr->dpr_lock); + dt_proc_release(dtp, P); + } + + return (err ? -1 : 0); +} + +int +dt_pid_create_probes_module(dtrace_hdl_t *dtp, dt_proc_t *dpr) +{ + dtrace_enable_io_t args; + dtrace_prog_t *pgp; + dt_stmt_t *stp; + dtrace_probedesc_t *pdp, pd; + pid_t pid; + int ret = 0, found = B_FALSE; + char provname[DTRACE_PROVNAMELEN]; + + (void) snprintf(provname, sizeof (provname), "pid%d", + (int)dpr->dpr_pid); + + for (pgp = dt_list_next(&dtp->dt_programs); pgp != NULL; + pgp = dt_list_next(pgp)) { + + for (stp = dt_list_next(&pgp->dp_stmts); stp != NULL; + stp = dt_list_next(stp)) { + + pdp = &stp->ds_desc->dtsd_ecbdesc->dted_probe; + pid = dt_pid_get_pid(pdp, dtp, NULL, dpr); + if (pid != dpr->dpr_pid) + continue; + + found = B_TRUE; + + pd = *pdp; + + if (gmatch(provname, pdp->dtpd_provider) != 0 && + dt_pid_create_pid_probes(&pd, dtp, NULL, dpr) != 0) + ret = 1; + + /* + * If it's not strictly a pid provider, we might match + * a USDT provider. + */ + if (strcmp(provname, pdp->dtpd_provider) != 0 && + dt_pid_create_usdt_probes(&pd, dtp, NULL, dpr) != 0) + ret = 1; + } + } + + if (found) { + /* + * Give DTrace a shot to the ribs to get it to check + * out the newly created probes. + */ + args.dof = NULL; + args.n_matched = 0; + (void) dt_ioctl(dtp, DTRACEIOC_ENABLE, &args); + } + + return (ret); +} + +/* + * libdtrace has a backroom deal with us to ask us for type information on + * behalf of pid provider probes when fasttrap doesn't return any type + * information. Instead we'll look up the module and see if there is type + * information available. However, if there is no type information available due + * to a lack of CTF data, then we want to make sure that DTrace still carries on + * in face of that. As such we don't have a meaningful exit code about failure. + * We emit information about why we failed to the dtrace debug log so someone + * can figure it out by asking nicely for DTRACE_DEBUG. + */ +void +dt_pid_get_types(dtrace_hdl_t *dtp, const dtrace_probedesc_t *pdp, + dtrace_argdesc_t *adp, int *nargs) +{ + dt_module_t *dmp; + ctf_file_t *fp; + ctf_funcinfo_t f; + ctf_id_t argv[32]; + GElf_Sym sym; + prsyminfo_t si; + struct ps_prochandle *p; + int i, args; + char buf[DTRACE_ARGTYPELEN]; + const char *mptr; + char *eptr; + int ret = 0; + int argc = sizeof (argv) / sizeof (ctf_id_t); + Lmid_t lmid; + + /* Set up a potential outcome */ + args = *nargs; + *nargs = 0; + + /* + * If we don't have an entry or return probe then we can just stop right + * now as we don't have arguments for offset probes. + */ + if (strcmp(pdp->dtpd_name, "entry") != 0 && + strcmp(pdp->dtpd_name, "return") != 0) + return; + + dmp = dt_module_create(dtp, pdp->dtpd_provider); + if (dmp == NULL) { + dt_dprintf("failed to find module for %s\n", + pdp->dtpd_provider); + return; + } + if (dt_module_load(dtp, dmp) != 0) { + dt_dprintf("failed to load module for %s\n", + pdp->dtpd_provider); + return; + } + + /* + * We may be working with a module that doesn't have ctf. If that's the + * case then we just return now and move on with life. + */ + fp = dt_module_getctflib(dtp, dmp, pdp->dtpd_mod); + if (fp == NULL) { + dt_dprintf("no ctf container for %s\n", + pdp->dtpd_mod); + return; + } + p = dt_proc_grab(dtp, dmp->dm_pid, 0, PGRAB_RDONLY | PGRAB_FORCE); + if (p == NULL) { + dt_dprintf("failed to grab pid\n"); + return; + } + dt_proc_lock(dtp, p); + + /* + * Check to see if the D module has a link map ID and separate that out + * for properly interrogating libproc. + */ + if ((mptr = strchr(pdp->dtpd_mod, '`')) != NULL) { + if (strlen(pdp->dtpd_mod) < 3) { + dt_dprintf("found weird modname with linkmap, " + "aborting: %s\n", pdp->dtpd_mod); + goto out; + } + if (pdp->dtpd_mod[0] != 'L' || pdp->dtpd_mod[1] != 'M') { + dt_dprintf("missing leading 'LM', " + "aborting: %s\n", pdp->dtpd_mod); + goto out; + } + errno = 0; + lmid = strtol(pdp->dtpd_mod + 2, &eptr, 16); + if (errno == ERANGE || eptr != mptr) { + dt_dprintf("failed to parse out lmid, aborting: %s\n", + pdp->dtpd_mod); + goto out; + } + mptr++; + } else { + mptr = pdp->dtpd_mod; + lmid = 0; + } + + if (Pxlookup_by_name(p, lmid, mptr, pdp->dtpd_func, + &sym, &si) != 0) { + dt_dprintf("failed to find function %s in %s`%s\n", + pdp->dtpd_func, pdp->dtpd_provider, pdp->dtpd_mod); + goto out; + } + if (ctf_func_info(fp, si.prs_id, &f) == CTF_ERR) { + dt_dprintf("failed to get ctf information for %s in %s`%s\n", + pdp->dtpd_func, pdp->dtpd_provider, pdp->dtpd_mod); + goto out; + } + + (void) snprintf(buf, sizeof (buf), "%s`%s", pdp->dtpd_provider, + pdp->dtpd_mod); + + if (strcmp(pdp->dtpd_name, "return") == 0) { + if (args < 2) + goto out; + + bzero(adp, sizeof (dtrace_argdesc_t)); + adp->dtargd_ndx = 0; + adp->dtargd_id = pdp->dtpd_id; + adp->dtargd_mapping = adp->dtargd_ndx; + /* + * We explicitly leave out the library here, we only care that + * it is some int. We are assuming that there is no ctf + * container in here that is lying about what an int is. + */ + (void) snprintf(adp->dtargd_native, DTRACE_ARGTYPELEN, + "user %s`%s", pdp->dtpd_provider, "int"); + adp++; + bzero(adp, sizeof (dtrace_argdesc_t)); + adp->dtargd_ndx = 1; + adp->dtargd_id = pdp->dtpd_id; + adp->dtargd_mapping = adp->dtargd_ndx; + ret = snprintf(adp->dtargd_native, DTRACE_ARGTYPELEN, + "userland "); + (void) ctf_type_qname(fp, f.ctc_return, adp->dtargd_native + + ret, DTRACE_ARGTYPELEN - ret, buf); + *nargs = 2; + } else { + if (ctf_func_args(fp, si.prs_id, argc, argv) == CTF_ERR) + goto out; + + *nargs = MIN(args, f.ctc_argc); + for (i = 0; i < *nargs; i++, adp++) { + bzero(adp, sizeof (dtrace_argdesc_t)); + adp->dtargd_ndx = i; + adp->dtargd_id = pdp->dtpd_id; + adp->dtargd_mapping = adp->dtargd_ndx; + ret = snprintf(adp->dtargd_native, DTRACE_ARGTYPELEN, + "userland "); + (void) ctf_type_qname(fp, argv[i], adp->dtargd_native + + ret, DTRACE_ARGTYPELEN - ret, buf); + } + } +out: + dt_proc_unlock(dtp, p); + dt_proc_release(dtp, p); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pid.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pid.h new file mode 100644 index 0000000..4bf39c8 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pid.h @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#ifndef _DT_PID_H +#define _DT_PID_H + +#include <libproc.h> +#include <sys/fasttrap.h> +#include <dt_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define DT_PROC_ERR (-1) +#define DT_PROC_ALIGN (-2) + +extern int dt_pid_create_probes(dtrace_probedesc_t *, dtrace_hdl_t *, + dt_pcb_t *pcb); +extern int dt_pid_create_probes_module(dtrace_hdl_t *, dt_proc_t *); + +extern int dt_pid_create_entry_probe(struct ps_prochandle *, dtrace_hdl_t *, + fasttrap_probe_spec_t *, const GElf_Sym *); + +extern int dt_pid_create_return_probe(struct ps_prochandle *, dtrace_hdl_t *, + fasttrap_probe_spec_t *, const GElf_Sym *, uint64_t *); + +extern int dt_pid_create_offset_probe(struct ps_prochandle *, dtrace_hdl_t *, + fasttrap_probe_spec_t *, const GElf_Sym *, ulong_t); + +extern int dt_pid_create_glob_offset_probes(struct ps_prochandle *, + dtrace_hdl_t *, fasttrap_probe_spec_t *, const GElf_Sym *, const char *); + +extern void dt_pid_get_types(dtrace_hdl_t *, const dtrace_probedesc_t *, + dtrace_argdesc_t *, int *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_PID_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.c new file mode 100644 index 0000000..0cd556a --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <dtrace.h> +#include <dt_impl.h> +#include <dt_pq.h> +#include <assert.h> + +/* + * Create a new priority queue. + * + * size is the maximum number of items that will be stored in the priority + * queue at one time. + */ +dt_pq_t * +dt_pq_init(dtrace_hdl_t *dtp, uint_t size, dt_pq_value_f value_cb, void *cb_arg) +{ + dt_pq_t *p; + assert(size > 1); + + if ((p = dt_zalloc(dtp, sizeof (dt_pq_t))) == NULL) + return (NULL); + + p->dtpq_items = dt_zalloc(dtp, size * sizeof (p->dtpq_items[0])); + if (p->dtpq_items == NULL) { + dt_free(dtp, p); + return (NULL); + } + + p->dtpq_hdl = dtp; + p->dtpq_size = size; + p->dtpq_last = 1; + p->dtpq_value = value_cb; + p->dtpq_arg = cb_arg; + + return (p); +} + +void +dt_pq_fini(dt_pq_t *p) +{ + dtrace_hdl_t *dtp = p->dtpq_hdl; + + dt_free(dtp, p->dtpq_items); + dt_free(dtp, p); +} + +static uint64_t +dt_pq_getvalue(dt_pq_t *p, uint_t index) +{ + void *item = p->dtpq_items[index]; + return (p->dtpq_value(item, p->dtpq_arg)); +} + +void +dt_pq_insert(dt_pq_t *p, void *item) +{ + uint_t i; + + assert(p->dtpq_last < p->dtpq_size); + + i = p->dtpq_last++; + p->dtpq_items[i] = item; + + while (i > 1 && dt_pq_getvalue(p, i) < dt_pq_getvalue(p, i / 2)) { + void *tmp = p->dtpq_items[i]; + p->dtpq_items[i] = p->dtpq_items[i / 2]; + p->dtpq_items[i / 2] = tmp; + i /= 2; + } +} + +/* + * Return elements from the priority queue. *cookie should be zero when first + * called. Returns NULL when there are no more elements. + */ +void * +dt_pq_walk(dt_pq_t *p, uint_t *cookie) +{ + (*cookie)++; + if (*cookie >= p->dtpq_last) + return (NULL); + + return (p->dtpq_items[*cookie]); +} + +void * +dt_pq_pop(dt_pq_t *p) +{ + uint_t i = 1; + void *ret; + + assert(p->dtpq_last > 0); + + if (p->dtpq_last == 1) + return (NULL); + + ret = p->dtpq_items[1]; + + p->dtpq_last--; + p->dtpq_items[1] = p->dtpq_items[p->dtpq_last]; + p->dtpq_items[p->dtpq_last] = NULL; + + for (;;) { + uint_t lc = i * 2; + uint_t rc = i * 2 + 1; + uint_t c; + uint64_t v; + void *tmp; + + if (lc >= p->dtpq_last) + break; + + if (rc >= p->dtpq_last) { + c = lc; + v = dt_pq_getvalue(p, lc); + } else { + uint64_t lv = dt_pq_getvalue(p, lc); + uint64_t rv = dt_pq_getvalue(p, rc); + + if (lv < rv) { + c = lc; + v = lv; + } else { + c = rc; + v = rv; + } + } + + if (v >= dt_pq_getvalue(p, i)) + break; + + tmp = p->dtpq_items[i]; + p->dtpq_items[i] = p->dtpq_items[c]; + p->dtpq_items[c] = tmp; + + i = c; + } + + return (ret); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.h new file mode 100644 index 0000000..8184a90 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.h @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#ifndef _DT_PQ_H +#define _DT_PQ_H + +#include <dtrace.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint64_t (*dt_pq_value_f)(void *, void *); + +typedef struct dt_pq { + dtrace_hdl_t *dtpq_hdl; /* dtrace handle */ + void **dtpq_items; /* array of elements */ + uint_t dtpq_size; /* count of allocated elements */ + uint_t dtpq_last; /* next free slot */ + dt_pq_value_f dtpq_value; /* callback to get the value */ + void *dtpq_arg; /* callback argument */ +} dt_pq_t; + +extern dt_pq_t *dt_pq_init(dtrace_hdl_t *, uint_t size, dt_pq_value_f, void *); +extern void dt_pq_fini(dt_pq_t *); + +extern void dt_pq_insert(dt_pq_t *, void *); +extern void *dt_pq_pop(dt_pq_t *); +extern void *dt_pq_walk(dt_pq_t *, uint_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_PQ_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pragma.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pragma.c new file mode 100644 index 0000000..7f49f64 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pragma.c @@ -0,0 +1,556 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011, Joyent Inc. All rights reserved. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <strings.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> + +#include <sys/types.h> +#include <sys/sysctl.h> +#include <sys/stat.h> + +#include <dt_parser.h> +#include <dt_impl.h> +#include <dt_provider.h> +#include <dt_module.h> + +/* + * This callback function is installed in a given identifier hash to search for + * and apply deferred pragmas that are pending for a given new identifier name. + * Multiple pragmas may be pending for a given name; we processs all of them. + */ +/*ARGSUSED*/ +static void +dt_pragma_apply(dt_idhash_t *dhp, dt_ident_t *idp) +{ + dt_idhash_t *php; + dt_ident_t *pdp; + + if ((php = yypcb->pcb_pragmas) == NULL) + return; /* no pragmas pending for current compilation pass */ + + while ((pdp = dt_idhash_lookup(php, idp->di_name)) != NULL) { + switch (pdp->di_kind) { + case DT_IDENT_PRAGAT: + idp->di_attr = pdp->di_attr; + break; + case DT_IDENT_PRAGBN: + idp->di_vers = pdp->di_vers; + break; + } + dt_idhash_delete(php, pdp); + } +} + +/* + * The #pragma attributes directive can be used to reset stability attributes + * on a global identifier or inline definition. If the identifier is already + * defined, we can just change di_attr. If not, we insert the pragma into a + * hash table of the current pcb's deferred pragmas for later processing. + */ +static void +dt_pragma_attributes(const char *prname, dt_node_t *dnp) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dtrace_attribute_t attr, *a; + dt_provider_t *pvp; + const char *name, *part; + dt_ident_t *idp; + + if (dnp == NULL || dnp->dn_kind != DT_NODE_IDENT || + dnp->dn_list == NULL || dnp->dn_list->dn_kind != DT_NODE_IDENT) { + xyerror(D_PRAGMA_MALFORM, "malformed #pragma %s " + "<attributes> <ident>\n", prname); + } + + if (dtrace_str2attr(dnp->dn_string, &attr) == -1) { + xyerror(D_PRAGMA_INVAL, "invalid attributes " + "specified by #pragma %s\n", prname); + } + + dnp = dnp->dn_list; + name = dnp->dn_string; + + if (strcmp(name, "provider") == 0) { + dnp = dnp->dn_list; + name = dnp->dn_string; + + dnp = dnp->dn_list; + part = dnp->dn_string; + + if ((pvp = dt_provider_lookup(dtp, name)) != NULL) { + if (strcmp(part, "provider") == 0) { + a = &pvp->pv_desc.dtvd_attr.dtpa_provider; + } else if (strcmp(part, "module") == 0) { + a = &pvp->pv_desc.dtvd_attr.dtpa_mod; + } else if (strcmp(part, "function") == 0) { + a = &pvp->pv_desc.dtvd_attr.dtpa_func; + } else if (strcmp(part, "name") == 0) { + a = &pvp->pv_desc.dtvd_attr.dtpa_name; + } else if (strcmp(part, "args") == 0) { + a = &pvp->pv_desc.dtvd_attr.dtpa_args; + } else { + xyerror(D_PRAGMA_INVAL, "invalid component " + "\"%s\" in attribute #pragma " + "for provider %s\n", name, part); + } + + *a = attr; + return; + } + + } else if ((idp = dt_idstack_lookup( + &yypcb->pcb_globals, name)) != NULL) { + + if (idp->di_gen != dtp->dt_gen) { + xyerror(D_PRAGMA_SCOPE, "#pragma %s cannot modify " + "entity defined outside program scope\n", prname); + } + + idp->di_attr = attr; + return; + } + + if (yypcb->pcb_pragmas == NULL && (yypcb->pcb_pragmas = + dt_idhash_create("pragma", NULL, 0, 0)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + idp = dt_idhash_insert(yypcb->pcb_pragmas, name, DT_IDENT_PRAGAT, 0, 0, + attr, 0, &dt_idops_thaw, (void *)prname, dtp->dt_gen); + + if (idp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + if (dtp->dt_globals->dh_defer == NULL) + dtp->dt_globals->dh_defer = &dt_pragma_apply; +} + +/* + * The #pragma binding directive can be used to reset the version binding + * on a global identifier or inline definition. If the identifier is already + * defined, we can just change di_vers. If not, we insert the pragma into a + * hash table of the current pcb's deferred pragmas for later processing. + */ +static void +dt_pragma_binding(const char *prname, dt_node_t *dnp) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_version_t vers; + const char *name; + dt_ident_t *idp; + + if (dnp == NULL || dnp->dn_kind != DT_NODE_STRING || + dnp->dn_list == NULL || dnp->dn_list->dn_kind != DT_NODE_IDENT) { + xyerror(D_PRAGMA_MALFORM, "malformed #pragma %s " + "\"version\" <ident>\n", prname); + } + + if (dt_version_str2num(dnp->dn_string, &vers) == -1) { + xyerror(D_PRAGMA_INVAL, "invalid version string " + "specified by #pragma %s\n", prname); + } + + name = dnp->dn_list->dn_string; + idp = dt_idstack_lookup(&yypcb->pcb_globals, name); + + if (idp != NULL) { + if (idp->di_gen != dtp->dt_gen) { + xyerror(D_PRAGMA_SCOPE, "#pragma %s cannot modify " + "entity defined outside program scope\n", prname); + } + idp->di_vers = vers; + return; + } + + if (yypcb->pcb_pragmas == NULL && (yypcb->pcb_pragmas = + dt_idhash_create("pragma", NULL, 0, 0)) == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + idp = dt_idhash_insert(yypcb->pcb_pragmas, name, DT_IDENT_PRAGBN, 0, 0, + _dtrace_defattr, vers, &dt_idops_thaw, (void *)prname, dtp->dt_gen); + + if (idp == NULL) + longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); + + if (dtp->dt_globals->dh_defer == NULL) + dtp->dt_globals->dh_defer = &dt_pragma_apply; +} + +static void +dt_pragma_depends_finddep(dtrace_hdl_t *dtp, const char *lname, char *lib, + size_t len) +{ + dt_dirpath_t *dirp; + struct stat sbuf; + int found = 0; + + for (dirp = dt_list_next(&dtp->dt_lib_path); dirp != NULL; + dirp = dt_list_next(dirp)) { + (void) snprintf(lib, len, "%s/%s", dirp->dir_path, lname); + + if (stat(lib, &sbuf) == 0) { + found = 1; + break; + } + } + + if (!found) + xyerror(D_PRAGMA_DEPEND, + "failed to find dependency in libpath: %s", lname); +} + +/* + * The #pragma depends_on directive can be used to express a dependency on a + * module, provider or library which if not present will cause processing to + * abort. + */ +static void +dt_pragma_depends(const char *prname, dt_node_t *cnp) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + dt_node_t *nnp = cnp ? cnp->dn_list : NULL; + int found; + dt_lib_depend_t *dld; + char lib[MAXPATHLEN]; + size_t plen; + char *provs, *cpy, *tok; + + if (cnp == NULL || nnp == NULL || + cnp->dn_kind != DT_NODE_IDENT || nnp->dn_kind != DT_NODE_IDENT) { + xyerror(D_PRAGMA_MALFORM, "malformed #pragma %s " + "<class> <name>\n", prname); + } + + if (strcmp(cnp->dn_string, "provider") == 0) { + /* + * First try to get the provider list using the + * debug.dtrace.providers sysctl, since that'll work even if + * we're not running as root. + */ + provs = NULL; + if (sysctlbyname("debug.dtrace.providers", NULL, &plen, NULL, 0) || + ((provs = dt_alloc(dtp, plen)) == NULL) || + sysctlbyname("debug.dtrace.providers", provs, &plen, NULL, 0)) + found = dt_provider_lookup(dtp, nnp->dn_string) != NULL; + else { + found = B_FALSE; + for (cpy = provs; (tok = strsep(&cpy, " ")) != NULL; ) + if (strcmp(tok, nnp->dn_string) == 0) { + found = B_TRUE; + break; + } + if (found == B_FALSE) + found = dt_provider_lookup(dtp, + nnp->dn_string) != NULL; + } + if (provs != NULL) + dt_free(dtp, provs); + } else if (strcmp(cnp->dn_string, "module") == 0) { + dt_module_t *mp = dt_module_lookup_by_name(dtp, nnp->dn_string); + found = mp != NULL && dt_module_getctf(dtp, mp) != NULL; + } else if (strcmp(cnp->dn_string, "library") == 0) { + if (yypcb->pcb_cflags & DTRACE_C_CTL) { + assert(dtp->dt_filetag != NULL); + + dt_pragma_depends_finddep(dtp, nnp->dn_string, lib, + sizeof (lib)); + + dld = dt_lib_depend_lookup(&dtp->dt_lib_dep, + dtp->dt_filetag); + assert(dld != NULL); + + if ((dt_lib_depend_add(dtp, &dld->dtld_dependencies, + lib)) != 0) { + xyerror(D_PRAGMA_DEPEND, + "failed to add dependency %s:%s\n", lib, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + } else { + /* + * By this point we have already performed a topological + * sort of the dependencies; we process this directive + * as satisfied as long as the dependency was properly + * loaded. + */ + if (dtp->dt_filetag == NULL) + xyerror(D_PRAGMA_DEPEND, "main program may " + "not explicitly depend on a library"); + + dld = dt_lib_depend_lookup(&dtp->dt_lib_dep, + dtp->dt_filetag); + assert(dld != NULL); + + dt_pragma_depends_finddep(dtp, nnp->dn_string, lib, + sizeof (lib)); + dld = dt_lib_depend_lookup(&dtp->dt_lib_dep_sorted, + lib); + assert(dld != NULL); + + if (!dld->dtld_loaded) + xyerror(D_PRAGMA_DEPEND, "program requires " + "library \"%s\" which failed to load", + lib); + } + + found = B_TRUE; + } else { + xyerror(D_PRAGMA_INVAL, "invalid class %s " + "specified by #pragma %s\n", cnp->dn_string, prname); + } + + if (!found) { + xyerror(D_PRAGMA_DEPEND, "program requires %s %s\n", + cnp->dn_string, nnp->dn_string); + } +} + +/* + * The #pragma error directive can be followed by any list of tokens, which we + * just concatenate and print as part of our error message. + */ +static void +dt_pragma_error(const char *prname, dt_node_t *dnp) +{ + dt_node_t *enp; + size_t n = 0; + char *s; + + for (enp = dnp; enp != NULL; enp = enp->dn_list) { + if (enp->dn_kind == DT_NODE_IDENT || + enp->dn_kind == DT_NODE_STRING) + n += strlen(enp->dn_string) + 1; + } + + s = alloca(n + 1); + s[0] = '\0'; + + for (enp = dnp; enp != NULL; enp = enp->dn_list) { + if (enp->dn_kind == DT_NODE_IDENT || + enp->dn_kind == DT_NODE_STRING) { + (void) strcat(s, enp->dn_string); + (void) strcat(s, " "); + } + } + + xyerror(D_PRAGERR, "#%s: %s\n", prname, s); +} + +/*ARGSUSED*/ +static void +dt_pragma_ident(const char *prname, dt_node_t *dnp) +{ + /* ignore any #ident or #pragma ident lines */ +} + +static void +dt_pragma_option(const char *prname, dt_node_t *dnp) +{ + dtrace_hdl_t *dtp = yypcb->pcb_hdl; + char *opt, *val; + + if (dnp == NULL || dnp->dn_kind != DT_NODE_IDENT) { + xyerror(D_PRAGMA_MALFORM, + "malformed #pragma %s <option>=<val>\n", prname); + } + + if (dnp->dn_list != NULL) { + xyerror(D_PRAGMA_MALFORM, + "superfluous arguments specified for #pragma %s\n", prname); + } + + opt = alloca(strlen(dnp->dn_string) + 1); + (void) strcpy(opt, dnp->dn_string); + + if ((val = strchr(opt, '=')) != NULL) + *val++ = '\0'; + + if (dtrace_setopt(dtp, opt, val) == -1) { + if (val == NULL) { + xyerror(D_PRAGMA_OPTSET, + "failed to set option '%s': %s\n", opt, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + } else { + xyerror(D_PRAGMA_OPTSET, + "failed to set option '%s' to '%s': %s\n", + opt, val, dtrace_errmsg(dtp, dtrace_errno(dtp))); + } + } +} + +/* + * The #line directive is used to reset the input line number and to optionally + * note the file name for use in error messages. Sun cpp(1) also produces a + * third integer token after the filename which is one of the following: + * + * 0 - line change has nothing to do with an #include file + * 1 - line change because we just entered a #include file + * 2 - line change because we just exited a #include file + * + * We use these state tokens to adjust pcb_idepth, which in turn controls + * whether type lookups access the global type space or not. + */ +static void +dt_pragma_line(const char *prname, dt_node_t *dnp) +{ + dt_node_t *fnp = dnp ? dnp->dn_list : NULL; + dt_node_t *inp = fnp ? fnp->dn_list : NULL; + + if ((dnp == NULL || dnp->dn_kind != DT_NODE_INT) || + (fnp != NULL && fnp->dn_kind != DT_NODE_STRING) || + (inp != NULL && inp->dn_kind != DT_NODE_INT)) { + xyerror(D_PRAGMA_MALFORM, "malformed #%s " + "<line> [ [\"file\"] state ]\n", prname); + } + + /* + * If a file is specified, free any old pcb_filetag and swap fnp's + * dn_string into pcb_filetag as the new filename for error messages. + */ + if (fnp != NULL) { + if (yypcb->pcb_filetag != NULL) + free(yypcb->pcb_filetag); + + /* + * This is not pretty, but is a necessary evil until we either + * write "dpp" or get a useful standalone cpp from DevPro. If + * the filename begins with /dev/fd, we know it's the master + * input file (see dt_preproc() in dt_cc.c), so just clear the + * dt_filetag pointer so error messages refer to the main file. + */ + if (strncmp(fnp->dn_string, "/dev/fd/", 8) != 0) { + yypcb->pcb_filetag = fnp->dn_string; + fnp->dn_string = NULL; + } else + yypcb->pcb_filetag = NULL; + } + + if (inp != NULL) { + if (inp->dn_value == 1) + yypcb->pcb_idepth++; + else if (inp->dn_value == 2 && yypcb->pcb_idepth != 0) + yypcb->pcb_idepth--; + } + + yylineno = dnp->dn_value; +} + +/* + * D compiler pragma types range from control directives to common pragmas to + * D custom pragmas, in order of specificity. Similar to gcc, we use #pragma D + * as a special prefix for our pragmas so they can be used in mixed headers. + */ +#define DT_PRAGMA_DIR 0 /* pragma directive may be used after naked # */ +#define DT_PRAGMA_SUB 1 /* pragma directive may be used after #pragma */ +#define DT_PRAGMA_DCP 2 /* pragma may only be used after #pragma D */ + +static const struct dt_pragmadesc { + const char *dpd_name; + void (*dpd_func)(const char *, dt_node_t *); + int dpd_kind; +} dt_pragmas[] = { + { "attributes", dt_pragma_attributes, DT_PRAGMA_DCP }, + { "binding", dt_pragma_binding, DT_PRAGMA_DCP }, + { "depends_on", dt_pragma_depends, DT_PRAGMA_DCP }, + { "error", dt_pragma_error, DT_PRAGMA_DIR }, + { "ident", dt_pragma_ident, DT_PRAGMA_DIR }, + { "line", dt_pragma_line, DT_PRAGMA_DIR }, + { "option", dt_pragma_option, DT_PRAGMA_DCP }, + { NULL, NULL } +}; + +/* + * Process a control line #directive by looking up the directive name in our + * lookup table and invoking the corresponding function with the token list. + * According to K&R[A12.9], we silently ignore null directive lines. + */ +void +dt_pragma(dt_node_t *pnp) +{ + const struct dt_pragmadesc *dpd; + dt_node_t *dnp; + int kind = DT_PRAGMA_DIR; + + for (dnp = pnp; dnp != NULL; dnp = dnp->dn_list) { + if (dnp->dn_kind == DT_NODE_INT) { + dt_pragma_line("line", dnp); + break; + } + + if (dnp->dn_kind != DT_NODE_IDENT) + xyerror(D_PRAGCTL_INVAL, "invalid control directive\n"); + + if (kind == DT_PRAGMA_DIR && + strcmp(dnp->dn_string, "pragma") == 0) { + kind = DT_PRAGMA_SUB; + continue; + } + + if (kind == DT_PRAGMA_SUB && + strcmp(dnp->dn_string, "D") == 0) { + kind = DT_PRAGMA_DCP; + continue; + } + + for (dpd = dt_pragmas; dpd->dpd_name != NULL; dpd++) { + if (dpd->dpd_kind <= kind && + strcmp(dpd->dpd_name, dnp->dn_string) == 0) + break; + } + + yylineno--; /* since we've already seen \n */ + + if (dpd->dpd_name != NULL) { + dpd->dpd_func(dpd->dpd_name, dnp->dn_list); + yylineno++; + break; + } + + switch (kind) { + case DT_PRAGMA_DIR: + xyerror(D_PRAGCTL_INVAL, "invalid control directive: " + "#%s\n", dnp->dn_string); + /*NOTREACHED*/ + case DT_PRAGMA_SUB: + break; /* K&R[A12.8] says to ignore unknown pragmas */ + case DT_PRAGMA_DCP: + default: + xyerror(D_PRAGMA_INVAL, "invalid D pragma: %s\n", + dnp->dn_string); + } + + yylineno++; + break; + } + + dt_node_list_free(&pnp); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_print.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_print.c new file mode 100644 index 0000000..4be0f03 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_print.c @@ -0,0 +1,706 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2011 by Delphix. All rights reserved. + */ +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +/* + * DTrace print() action + * + * This file contains the post-processing logic for the print() action. The + * print action behaves identically to trace() in that it generates a + * DTRACEACT_DIFEXPR action, but the action argument field refers to a CTF type + * string stored in the DOF string table (similar to printf formats). We + * take the result of the trace action and post-process it in the fashion of + * MDB's ::print dcmd. + * + * This implementation differs from MDB's in the following ways: + * + * - We do not expose any options or flags. The behavior of print() is + * equivalent to "::print -tn". + * + * - MDB will display "holes" in structures (unused padding between + * members). + * + * - When printing arrays of structures, MDB will leave a trailing ',' + * after the last element. + * + * - MDB will print time_t types as date and time. + * + * - MDB will detect when an enum is actually the OR of several flags, + * and print it out with the constituent flags separated. + * + * - For large arrays, MDB will print the first few members and then + * print a "..." continuation line. + * + * - MDB will break and wrap arrays at 80 columns. + * + * - MDB prints out floats and doubles by hand, as it must run in kmdb + * context. We're able to leverage the printf() format strings, + * but the result is a slightly different format. + */ + +#include <sys/sysmacros.h> +#include <strings.h> +#include <stdlib.h> +#include <alloca.h> +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <sys/socket.h> +#include <netdb.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <arpa/nameser.h> + +#include <dt_module.h> +#include <dt_printf.h> +#include <dt_string.h> +#include <dt_impl.h> + +/* determines whether the given integer CTF encoding is a character */ +#define CTF_IS_CHAR(e) \ + (((e).cte_format & (CTF_INT_CHAR | CTF_INT_SIGNED)) == \ + (CTF_INT_CHAR | CTF_INT_SIGNED) && (e).cte_bits == NBBY) +/* determines whether the given CTF kind is a struct or union */ +#define CTF_IS_STRUCTLIKE(k) \ + ((k) == CTF_K_STRUCT || (k) == CTF_K_UNION) + +/* + * Print structure passed down recursively through printing algorithm. + */ +typedef struct dt_printarg { + dtrace_hdl_t *pa_dtp; /* libdtrace handle */ + caddr_t pa_addr; /* base address of trace data */ + ctf_file_t *pa_ctfp; /* CTF container */ + int pa_depth; /* member depth */ + int pa_nest; /* nested array depth */ + FILE *pa_file; /* output file */ +} dt_printarg_t; + +static int dt_print_member(const char *, ctf_id_t, ulong_t, int, void *); + +/* + * Safe version of ctf_type_name() that will fall back to just "<ctfid>" if it + * can't resolve the type. + */ +static void +dt_print_type_name(ctf_file_t *ctfp, ctf_id_t id, char *buf, size_t buflen) +{ + if (ctf_type_name(ctfp, id, buf, buflen) == NULL) + (void) snprintf(buf, buflen, "<%ld>", id); +} + +/* + * Print any necessary trailing braces for structures or unions. We don't get + * invoked when a struct or union ends, so we infer the need to print braces + * based on the depth the last time we printed something and the new depth. + */ +static void +dt_print_trailing_braces(dt_printarg_t *pap, int depth) +{ + int d; + + for (d = pap->pa_depth; d > depth; d--) { + (void) fprintf(pap->pa_file, "%*s}%s", + (d + pap->pa_nest - 1) * 4, "", + d == depth + 1 ? "" : "\n"); + } +} + +/* + * Print the appropriate amount of indentation given the current depth and + * array nesting. + */ +static void +dt_print_indent(dt_printarg_t *pap) +{ + (void) fprintf(pap->pa_file, "%*s", + (pap->pa_depth + pap->pa_nest) * 4, ""); +} + +/* + * Print a bitfield. It's worth noting that the D compiler support for + * bitfields is currently broken; printing "D`user_desc_t" (pulled in by the + * various D provider files) will produce incorrect results compared to + * "genunix`user_desc_t". + */ +static void +print_bitfield(dt_printarg_t *pap, ulong_t off, ctf_encoding_t *ep) +{ + FILE *fp = pap->pa_file; + caddr_t addr = pap->pa_addr + off / NBBY; + uint64_t mask = (1ULL << ep->cte_bits) - 1; + uint64_t value = 0; + size_t size = (ep->cte_bits + (NBBY - 1)) / NBBY; + uint8_t *buf = (uint8_t *)&value; + uint8_t shift; + + /* + * On big-endian machines, we need to adjust the buf pointer to refer + * to the lowest 'size' bytes in 'value', and we need to shift based on + * the offset from the end of the data, not the offset of the start. + */ +#if BYTE_ORDER == _BIG_ENDIAN + buf += sizeof (value) - size; + off += ep->cte_bits; +#endif + bcopy(addr, buf, size); + shift = off % NBBY; + + /* + * Offsets are counted from opposite ends on little- and + * big-endian machines. + */ +#if BYTE_ORDER == _BIG_ENDIAN + shift = NBBY - shift; +#endif + + /* + * If the bits we want do not begin on a byte boundary, shift the data + * right so that the value is in the lowest 'cte_bits' of 'value'. + */ + if (off % NBBY != 0) + value >>= shift; + value &= mask; + + (void) fprintf(fp, "%#llx", (u_longlong_t)value); +} + +/* + * Dump the contents of memory as a fixed-size integer in hex. + */ +static void +dt_print_hex(FILE *fp, caddr_t addr, size_t size) +{ + switch (size) { + case sizeof (uint8_t): + (void) fprintf(fp, "%#x", *(uint8_t *)addr); + break; + case sizeof (uint16_t): + /* LINTED - alignment */ + (void) fprintf(fp, "%#x", *(uint16_t *)addr); + break; + case sizeof (uint32_t): + /* LINTED - alignment */ + (void) fprintf(fp, "%#x", *(uint32_t *)addr); + break; + case sizeof (uint64_t): + (void) fprintf(fp, "%#llx", + /* LINTED - alignment */ + (unsigned long long)*(uint64_t *)addr); + break; + default: + (void) fprintf(fp, "<invalid size %u>", (uint_t)size); + } +} + +/* + * Print an integer type. Before dumping the contents via dt_print_hex(), we + * first check the encoding to see if it's part of a bitfield or a character. + */ +static void +dt_print_int(ctf_id_t base, ulong_t off, dt_printarg_t *pap) +{ + FILE *fp = pap->pa_file; + ctf_file_t *ctfp = pap->pa_ctfp; + ctf_encoding_t e; + size_t size; + caddr_t addr = pap->pa_addr + off / NBBY; + + if (ctf_type_encoding(ctfp, base, &e) == CTF_ERR) { + (void) fprintf(fp, "<unknown encoding>"); + return; + } + + /* + * This comes from MDB - it's not clear under what circumstances this + * would be found. + */ + if (e.cte_format & CTF_INT_VARARGS) { + (void) fprintf(fp, "..."); + return; + } + + /* + * We print this as a bitfield if the bit encoding indicates it's not + * an even power of two byte size, or is larger than 8 bytes. + */ + size = e.cte_bits / NBBY; + if (size > 8 || (e.cte_bits % NBBY) != 0 || (size & (size - 1)) != 0) { + print_bitfield(pap, off, &e); + return; + } + + /* + * If this is a character, print it out as such. + */ + if (CTF_IS_CHAR(e)) { + char c = *(char *)addr; + if (isprint(c)) + (void) fprintf(fp, "'%c'", c); + else if (c == 0) + (void) fprintf(fp, "'\\0'"); + else + (void) fprintf(fp, "'\\%03o'", c); + return; + } + + dt_print_hex(fp, addr, size); +} + +/* + * Print a floating point (float, double, long double) value. + */ +/* ARGSUSED */ +static void +dt_print_float(ctf_id_t base, ulong_t off, dt_printarg_t *pap) +{ + FILE *fp = pap->pa_file; + ctf_file_t *ctfp = pap->pa_ctfp; + ctf_encoding_t e; + caddr_t addr = pap->pa_addr + off / NBBY; + + if (ctf_type_encoding(ctfp, base, &e) == 0) { + if (e.cte_format == CTF_FP_SINGLE && + e.cte_bits == sizeof (float) * NBBY) { + /* LINTED - alignment */ + (void) fprintf(fp, "%+.7e", *((float *)addr)); + } else if (e.cte_format == CTF_FP_DOUBLE && + e.cte_bits == sizeof (double) * NBBY) { + /* LINTED - alignment */ + (void) fprintf(fp, "%+.7e", *((double *)addr)); + } else if (e.cte_format == CTF_FP_LDOUBLE && + e.cte_bits == sizeof (long double) * NBBY) { + /* LINTED - alignment */ + (void) fprintf(fp, "%+.16LE", *((long double *)addr)); + } else { + (void) fprintf(fp, "<unknown encoding>"); + } + } +} + +/* + * A pointer is generally printed as a fixed-size integer. If we have a + * function pointer, we try to look up its name. + */ +static void +dt_print_ptr(ctf_id_t base, ulong_t off, dt_printarg_t *pap) +{ + FILE *fp = pap->pa_file; + ctf_file_t *ctfp = pap->pa_ctfp; + caddr_t addr = pap->pa_addr + off / NBBY; + size_t size = ctf_type_size(ctfp, base); + ctf_id_t bid = ctf_type_reference(ctfp, base); + uint64_t pc; + dtrace_syminfo_t dts; + GElf_Sym sym; + + if (bid == CTF_ERR || ctf_type_kind(ctfp, bid) != CTF_K_FUNCTION) { + dt_print_hex(fp, addr, size); + } else { + /* LINTED - alignment */ + pc = *((uint64_t *)addr); + if (dtrace_lookup_by_addr(pap->pa_dtp, pc, &sym, &dts) != 0) { + dt_print_hex(fp, addr, size); + } else { + (void) fprintf(fp, "%s`%s", dts.dts_object, + dts.dts_name); + } + } +} + +/* + * Print out an array. This is somewhat complex, as we must manually visit + * each member, and recursively invoke ctf_type_visit() for each member. If + * the members are non-structs, then we print them out directly: + * + * [ 0x14, 0x2e, 0 ] + * + * If they are structs, then we print out the necessary leading and trailing + * braces, to end up with: + * + * [ + * type { + * ... + * }, + * type { + * ... + * } + * ] + * + * We also use a heuristic to detect whether the array looks like a character + * array. If the encoding indicates it's a character, and we have all + * printable characters followed by a null byte, then we display it as a + * string: + * + * [ "string" ] + */ +static void +dt_print_array(ctf_id_t base, ulong_t off, dt_printarg_t *pap) +{ + FILE *fp = pap->pa_file; + ctf_file_t *ctfp = pap->pa_ctfp; + caddr_t addr = pap->pa_addr + off / NBBY; + ctf_arinfo_t car; + ssize_t eltsize; + ctf_encoding_t e; + int i; + boolean_t isstring; + int kind; + ctf_id_t rtype; + + if (ctf_array_info(ctfp, base, &car) == CTF_ERR) { + (void) fprintf(fp, "0x%p", (void *)addr); + return; + } + + if ((eltsize = ctf_type_size(ctfp, car.ctr_contents)) < 0 || + (rtype = ctf_type_resolve(ctfp, car.ctr_contents)) == CTF_ERR || + (kind = ctf_type_kind(ctfp, rtype)) == CTF_ERR) { + (void) fprintf(fp, "<invalid type %lu>", car.ctr_contents); + return; + } + + /* see if this looks like a string */ + isstring = B_FALSE; + if (kind == CTF_K_INTEGER && + ctf_type_encoding(ctfp, rtype, &e) != CTF_ERR && CTF_IS_CHAR(e)) { + char c; + for (i = 0; i < car.ctr_nelems; i++) { + c = *((char *)addr + eltsize * i); + if (!isprint(c) || c == '\0') + break; + } + + if (i != car.ctr_nelems && c == '\0') + isstring = B_TRUE; + } + + /* + * As a slight aesthetic optimization, if we are a top-level type, then + * don't bother printing out the brackets. This lets print("foo") look + * like: + * + * string "foo" + * + * As D will internally represent this as a char[256] array. + */ + if (!isstring || pap->pa_depth != 0) + (void) fprintf(fp, "[ "); + + if (isstring) + (void) fprintf(fp, "\""); + + for (i = 0; i < car.ctr_nelems; i++) { + if (isstring) { + char c = *((char *)addr + eltsize * i); + if (c == '\0') + break; + (void) fprintf(fp, "%c", c); + } else { + /* + * Recursively invoke ctf_type_visit() on each member. + * We setup a new printarg struct with 'pa_nest' set to + * indicate that we are within a nested array. + */ + dt_printarg_t pa = *pap; + pa.pa_nest += pap->pa_depth + 1; + pa.pa_depth = 0; + pa.pa_addr = addr + eltsize * i; + (void) ctf_type_visit(ctfp, car.ctr_contents, + dt_print_member, &pa); + + dt_print_trailing_braces(&pa, 0); + if (i != car.ctr_nelems - 1) + (void) fprintf(fp, ", "); + else if (CTF_IS_STRUCTLIKE(kind)) + (void) fprintf(fp, "\n"); + } + } + + if (isstring) + (void) fprintf(fp, "\""); + + if (!isstring || pap->pa_depth != 0) { + if (CTF_IS_STRUCTLIKE(kind)) + dt_print_indent(pap); + else + (void) fprintf(fp, " "); + (void) fprintf(fp, "]"); + } +} + +/* + * This isued by both structs and unions to print the leading brace. + */ +/* ARGSUSED */ +static void +dt_print_structlike(ctf_id_t id, ulong_t off, dt_printarg_t *pap) +{ + (void) fprintf(pap->pa_file, "{"); +} + +/* + * For enums, we try to print the enum name, and fall back to the value if it + * can't be determined. We do not do any fancy flag processing like mdb. + */ +/* ARGSUSED */ +static void +dt_print_enum(ctf_id_t base, ulong_t off, dt_printarg_t *pap) +{ + FILE *fp = pap->pa_file; + ctf_file_t *ctfp = pap->pa_ctfp; + const char *ename; + ssize_t size; + caddr_t addr = pap->pa_addr + off / NBBY; + int value = 0; + + /* + * The C standard says that an enum will be at most the sizeof (int). + * But if all the values are less than that, the compiler can use a + * smaller size. Thanks standards. + */ + size = ctf_type_size(ctfp, base); + switch (size) { + case sizeof (uint8_t): + value = *(uint8_t *)addr; + break; + case sizeof (uint16_t): + value = *(uint16_t *)addr; + break; + case sizeof (int32_t): + value = *(int32_t *)addr; + break; + default: + (void) fprintf(fp, "<invalid enum size %u>", (uint_t)size); + return; + } + + if ((ename = ctf_enum_name(ctfp, base, value)) != NULL) + (void) fprintf(fp, "%s", ename); + else + (void) fprintf(fp, "%d", value); +} + +/* + * Forward declaration. There's not much to do here without the complete + * type information, so just print out this fact and drive on. + */ +/* ARGSUSED */ +static void +dt_print_tag(ctf_id_t base, ulong_t off, dt_printarg_t *pap) +{ + (void) fprintf(pap->pa_file, "<forward decl>"); +} + +typedef void dt_printarg_f(ctf_id_t, ulong_t, dt_printarg_t *); + +static dt_printarg_f *const dt_printfuncs[] = { + dt_print_int, /* CTF_K_INTEGER */ + dt_print_float, /* CTF_K_FLOAT */ + dt_print_ptr, /* CTF_K_POINTER */ + dt_print_array, /* CTF_K_ARRAY */ + dt_print_ptr, /* CTF_K_FUNCTION */ + dt_print_structlike, /* CTF_K_STRUCT */ + dt_print_structlike, /* CTF_K_UNION */ + dt_print_enum, /* CTF_K_ENUM */ + dt_print_tag /* CTF_K_FORWARD */ +}; + +/* + * Print one member of a structure. This callback is invoked from + * ctf_type_visit() recursively. + */ +static int +dt_print_member(const char *name, ctf_id_t id, ulong_t off, int depth, + void *data) +{ + char type[DT_TYPE_NAMELEN]; + int kind; + dt_printarg_t *pap = data; + FILE *fp = pap->pa_file; + ctf_file_t *ctfp = pap->pa_ctfp; + boolean_t arraymember; + boolean_t brief; + ctf_encoding_t e; + ctf_id_t rtype; + + dt_print_trailing_braces(pap, depth); + /* + * dt_print_trailing_braces() doesn't include the trailing newline; add + * it here if necessary. + */ + if (depth < pap->pa_depth) + (void) fprintf(fp, "\n"); + pap->pa_depth = depth; + + if ((rtype = ctf_type_resolve(ctfp, id)) == CTF_ERR || + (kind = ctf_type_kind(ctfp, rtype)) == CTF_ERR || + kind < CTF_K_INTEGER || kind > CTF_K_FORWARD) { + dt_print_indent(pap); + (void) fprintf(fp, "%s = <invalid type %lu>", name, id); + return (0); + } + + dt_print_type_name(ctfp, id, type, sizeof (type)); + + arraymember = (pap->pa_nest != 0 && depth == 0); + brief = (arraymember && !CTF_IS_STRUCTLIKE(kind)); + + if (!brief) { + /* + * If this is a direct array member and a struct (otherwise + * brief would be true), then print a trailing newline, as the + * array printing code doesn't include it because it might be a + * simple type. + */ + if (arraymember) + (void) fprintf(fp, "\n"); + dt_print_indent(pap); + + /* always print the type */ + (void) fprintf(fp, "%s", type); + if (name[0] != '\0') { + /* + * For aesthetics, we don't include a space between the + * type name and member name if the type is a pointer. + * This will give us "void *foo =" instead of "void * + * foo =". Unions also have the odd behavior that the + * type name is returned as "union ", with a trailing + * space, so we also avoid printing a space if the type + * name already ends with a space. + */ + if (type[strlen(type) - 1] != '*' && + type[strlen(type) -1] != ' ') { + (void) fprintf(fp, " "); + } + (void) fprintf(fp, "%s", name); + + /* + * If this looks like a bitfield, or is an integer not + * aligned on a byte boundary, print the number of + * bits after the name. + */ + if (kind == CTF_K_INTEGER && + ctf_type_encoding(ctfp, id, &e) == 0) { + ulong_t bits = e.cte_bits; + ulong_t size = bits / NBBY; + + if (bits % NBBY != 0 || + off % NBBY != 0 || + size > 8 || + size != ctf_type_size(ctfp, id)) { + (void) fprintf(fp, " :%lu", bits); + } + } + + (void) fprintf(fp, " ="); + } + (void) fprintf(fp, " "); + } + + dt_printfuncs[kind - 1](rtype, off, pap); + + /* direct simple array members are not separated by newlines */ + if (!brief) + (void) fprintf(fp, "\n"); + + return (0); +} + +/* + * Main print function invoked by dt_consume_cpu(). + */ +int +dtrace_print(dtrace_hdl_t *dtp, FILE *fp, const char *typename, + caddr_t addr, size_t len) +{ + const char *s; + char *object; + dt_printarg_t pa; + ctf_id_t id; + dt_module_t *dmp; + ctf_file_t *ctfp; + int libid; + + /* + * Split the fully-qualified type ID (module`id). This should + * always be the format, but if for some reason we don't find the + * expected value, return 0 to fall back to the generic trace() + * behavior. In the case of userland CTF modules this will actually be + * of the format (module`lib`id). This is due to the fact that those + * modules have multiple CTF containers which `lib` identifies. + */ + for (s = typename; *s != '\0' && *s != '`'; s++) + ; + + if (*s != '`') + return (0); + + object = alloca(s - typename + 1); + bcopy(typename, object, s - typename); + object[s - typename] = '\0'; + dmp = dt_module_lookup_by_name(dtp, object); + if (dmp == NULL) + return (0); + + if (dmp->dm_pid != 0) { + libid = atoi(s + 1); + s = strchr(s + 1, '`'); + if (s == NULL || libid > dmp->dm_nctflibs) + return (0); + ctfp = dmp->dm_libctfp[libid]; + } else { + ctfp = dt_module_getctf(dtp, dmp); + } + + id = atoi(s + 1); + + /* + * Try to get the CTF kind for this id. If something has gone horribly + * wrong and we can't resolve the ID, bail out and let trace() do the + * work. + */ + if (ctfp == NULL || ctf_type_kind(ctfp, id) == CTF_ERR) + return (0); + + /* setup the print structure and kick off the main print routine */ + pa.pa_dtp = dtp; + pa.pa_addr = addr; + pa.pa_ctfp = ctfp; + pa.pa_nest = 0; + pa.pa_depth = 0; + pa.pa_file = fp; + (void) ctf_type_visit(pa.pa_ctfp, id, dt_print_member, &pa); + + dt_print_trailing_braces(&pa, 0); + + return (len); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c new file mode 100644 index 0000000..eeb8735 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.c @@ -0,0 +1,2070 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +#if defined(sun) +#include <sys/sysmacros.h> +#else +#define ABS(a) ((a) < 0 ? -(a) : (a)) +#endif +#include <string.h> +#include <strings.h> +#include <stdlib.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <sys/socket.h> +#include <netdb.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <arpa/nameser.h> + +#include <dt_printf.h> +#include <dt_string.h> +#include <dt_impl.h> + +/*ARGSUSED*/ +static int +pfcheck_addr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + return (dt_node_is_pointer(dnp) || dt_node_is_integer(dnp)); +} + +/*ARGSUSED*/ +static int +pfcheck_kaddr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + return (dt_node_is_pointer(dnp) || dt_node_is_integer(dnp) || + dt_node_is_symaddr(dnp)); +} + +/*ARGSUSED*/ +static int +pfcheck_uaddr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + dtrace_hdl_t *dtp = pfv->pfv_dtp; + dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target"); + + if (dt_node_is_usymaddr(dnp)) + return (1); + + if (idp == NULL || idp->di_id == 0) + return (0); + + return (dt_node_is_pointer(dnp) || dt_node_is_integer(dnp)); +} + +/*ARGSUSED*/ +static int +pfcheck_stack(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + return (dt_node_is_stack(dnp)); +} + +/*ARGSUSED*/ +static int +pfcheck_time(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + return (dt_node_is_integer(dnp) && + dt_node_type_size(dnp) == sizeof (uint64_t)); +} + +/*ARGSUSED*/ +static int +pfcheck_str(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + ctf_file_t *ctfp; + ctf_encoding_t e; + ctf_arinfo_t r; + ctf_id_t base; + uint_t kind; + + if (dt_node_is_string(dnp)) + return (1); + + ctfp = dnp->dn_ctfp; + base = ctf_type_resolve(ctfp, dnp->dn_type); + kind = ctf_type_kind(ctfp, base); + + return (kind == CTF_K_ARRAY && ctf_array_info(ctfp, base, &r) == 0 && + (base = ctf_type_resolve(ctfp, r.ctr_contents)) != CTF_ERR && + ctf_type_encoding(ctfp, base, &e) == 0 && IS_CHAR(e)); +} + +/*ARGSUSED*/ +static int +pfcheck_wstr(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + ctf_file_t *ctfp = dnp->dn_ctfp; + ctf_id_t base = ctf_type_resolve(ctfp, dnp->dn_type); + uint_t kind = ctf_type_kind(ctfp, base); + + ctf_encoding_t e; + ctf_arinfo_t r; + + return (kind == CTF_K_ARRAY && ctf_array_info(ctfp, base, &r) == 0 && + (base = ctf_type_resolve(ctfp, r.ctr_contents)) != CTF_ERR && + ctf_type_kind(ctfp, base) == CTF_K_INTEGER && + ctf_type_encoding(ctfp, base, &e) == 0 && e.cte_bits == 32); +} + +/*ARGSUSED*/ +static int +pfcheck_csi(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + return (dt_node_is_integer(dnp) && + dt_node_type_size(dnp) <= sizeof (int)); +} + +/*ARGSUSED*/ +static int +pfcheck_fp(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + return (dt_node_is_float(dnp)); +} + +/*ARGSUSED*/ +static int +pfcheck_xint(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + return (dt_node_is_integer(dnp)); +} + +/*ARGSUSED*/ +static int +pfcheck_dint(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + if (dnp->dn_flags & DT_NF_SIGNED) + pfd->pfd_fmt[strlen(pfd->pfd_fmt) - 1] = 'i'; + else + pfd->pfd_fmt[strlen(pfd->pfd_fmt) - 1] = 'u'; + + return (dt_node_is_integer(dnp)); +} + +/*ARGSUSED*/ +static int +pfcheck_xshort(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + ctf_file_t *ctfp = dnp->dn_ctfp; + ctf_id_t type = ctf_type_resolve(ctfp, dnp->dn_type); + char n[DT_TYPE_NAMELEN]; + + return (ctf_type_name(ctfp, type, n, sizeof (n)) != NULL && ( + strcmp(n, "short") == 0 || strcmp(n, "signed short") == 0 || + strcmp(n, "unsigned short") == 0)); +} + +/*ARGSUSED*/ +static int +pfcheck_xlong(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + ctf_file_t *ctfp = dnp->dn_ctfp; + ctf_id_t type = ctf_type_resolve(ctfp, dnp->dn_type); + char n[DT_TYPE_NAMELEN]; + + return (ctf_type_name(ctfp, type, n, sizeof (n)) != NULL && ( + strcmp(n, "long") == 0 || strcmp(n, "signed long") == 0 || + strcmp(n, "unsigned long") == 0)); +} + +/*ARGSUSED*/ +static int +pfcheck_xlonglong(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + ctf_file_t *ctfp = dnp->dn_ctfp; + ctf_id_t type = dnp->dn_type; + char n[DT_TYPE_NAMELEN]; + + if (ctf_type_name(ctfp, ctf_type_resolve(ctfp, type), n, + sizeof (n)) != NULL && (strcmp(n, "long long") == 0 || + strcmp(n, "signed long long") == 0 || + strcmp(n, "unsigned long long") == 0)) + return (1); + + /* + * If the type used for %llx or %llX is not an [unsigned] long long, we + * also permit it to be a [u]int64_t or any typedef thereof. We know + * that these typedefs are guaranteed to work with %ll[xX] in either + * compilation environment even though they alias to "long" in LP64. + */ + while (ctf_type_kind(ctfp, type) == CTF_K_TYPEDEF) { + if (ctf_type_name(ctfp, type, n, sizeof (n)) != NULL && + (strcmp(n, "int64_t") == 0 || strcmp(n, "uint64_t") == 0)) + return (1); + + type = ctf_type_reference(ctfp, type); + } + + return (0); +} + +/*ARGSUSED*/ +static int +pfcheck_type(dt_pfargv_t *pfv, dt_pfargd_t *pfd, dt_node_t *dnp) +{ + return (ctf_type_compat(dnp->dn_ctfp, ctf_type_resolve(dnp->dn_ctfp, + dnp->dn_type), pfd->pfd_conv->pfc_dctfp, pfd->pfd_conv->pfc_dtype)); +} + +/*ARGSUSED*/ +static int +pfprint_sint(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t unormal) +{ + int64_t normal = (int64_t)unormal; + int32_t n = (int32_t)normal; + + switch (size) { + case sizeof (int8_t): + return (dt_printf(dtp, fp, format, + (int32_t)*((int8_t *)addr) / n)); + case sizeof (int16_t): + return (dt_printf(dtp, fp, format, + (int32_t)*((int16_t *)addr) / n)); + case sizeof (int32_t): + return (dt_printf(dtp, fp, format, + *((int32_t *)addr) / n)); + case sizeof (int64_t): + return (dt_printf(dtp, fp, format, + *((int64_t *)addr) / normal)); + default: + return (dt_set_errno(dtp, EDT_DMISMATCH)); + } +} + +/*ARGSUSED*/ +static int +pfprint_uint(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + uint32_t n = (uint32_t)normal; + + switch (size) { + case sizeof (uint8_t): + return (dt_printf(dtp, fp, format, + (uint32_t)*((uint8_t *)addr) / n)); + case sizeof (uint16_t): + return (dt_printf(dtp, fp, format, + (uint32_t)*((uint16_t *)addr) / n)); + case sizeof (uint32_t): + return (dt_printf(dtp, fp, format, + *((uint32_t *)addr) / n)); + case sizeof (uint64_t): + return (dt_printf(dtp, fp, format, + *((uint64_t *)addr) / normal)); + default: + return (dt_set_errno(dtp, EDT_DMISMATCH)); + } +} + +static int +pfprint_dint(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + if (pfd->pfd_flags & DT_PFCONV_SIGNED) + return (pfprint_sint(dtp, fp, format, pfd, addr, size, normal)); + else + return (pfprint_uint(dtp, fp, format, pfd, addr, size, normal)); +} + +/*ARGSUSED*/ +static int +pfprint_fp(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + double n = (double)normal; + long double ldn = (long double)normal; + + switch (size) { + case sizeof (float): + return (dt_printf(dtp, fp, format, + (double)*((float *)addr) / n)); + case sizeof (double): + return (dt_printf(dtp, fp, format, + *((double *)addr) / n)); +#if !defined(__arm__) && !defined(__powerpc__) && !defined(__mips__) + case sizeof (long double): + return (dt_printf(dtp, fp, format, + *((long double *)addr) / ldn)); +#endif + default: + return (dt_set_errno(dtp, EDT_DMISMATCH)); + } +} + +/*ARGSUSED*/ +static int +pfprint_addr(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + char *s; + int n, len = 256; + uint64_t val; + + switch (size) { + case sizeof (uint32_t): + val = *((uint32_t *)addr); + break; + case sizeof (uint64_t): + val = *((uint64_t *)addr); + break; + default: + return (dt_set_errno(dtp, EDT_DMISMATCH)); + } + + do { + n = len; + s = alloca(n); + } while ((len = dtrace_addr2str(dtp, val, s, n)) > n); + + return (dt_printf(dtp, fp, format, s)); +} + +/*ARGSUSED*/ +static int +pfprint_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + return (dt_print_mod(dtp, fp, format, (caddr_t)addr)); +} + +/*ARGSUSED*/ +static int +pfprint_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + return (dt_print_umod(dtp, fp, format, (caddr_t)addr)); +} + +/*ARGSUSED*/ +static int +pfprint_uaddr(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + char *s; + int n, len = 256; + uint64_t val, pid = 0; + + dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target"); + + switch (size) { + case sizeof (uint32_t): + val = (u_longlong_t)*((uint32_t *)addr); + break; + case sizeof (uint64_t): + val = (u_longlong_t)*((uint64_t *)addr); + break; + case sizeof (uint64_t) * 2: + pid = ((uint64_t *)(uintptr_t)addr)[0]; + val = ((uint64_t *)(uintptr_t)addr)[1]; + break; + default: + return (dt_set_errno(dtp, EDT_DMISMATCH)); + } + + if (pid == 0 && dtp->dt_vector == NULL && idp != NULL) + pid = idp->di_id; + + do { + n = len; + s = alloca(n); + } while ((len = dtrace_uaddr2str(dtp, pid, val, s, n)) > n); + + return (dt_printf(dtp, fp, format, s)); +} + +/*ARGSUSED*/ +static int +pfprint_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *vaddr, size_t size, uint64_t normal) +{ + int width; + dtrace_optval_t saved = dtp->dt_options[DTRACEOPT_STACKINDENT]; + const dtrace_recdesc_t *rec = pfd->pfd_rec; + caddr_t addr = (caddr_t)vaddr; + int err = 0; + + /* + * We have stashed the value of the STACKINDENT option, and we will + * now override it for the purposes of formatting the stack. If the + * field has been specified as left-aligned (i.e. (%-#), we set the + * indentation to be the width. This is a slightly odd semantic, but + * it's useful functionality -- and it's slightly odd to begin with to + * be using a single format specifier to be formatting multiple lines + * of text... + */ + if (pfd->pfd_dynwidth < 0) { + assert(pfd->pfd_flags & DT_PFCONV_DYNWIDTH); + width = -pfd->pfd_dynwidth; + } else if (pfd->pfd_flags & DT_PFCONV_LEFT) { + width = pfd->pfd_dynwidth ? pfd->pfd_dynwidth : pfd->pfd_width; + } else { + width = 0; + } + + dtp->dt_options[DTRACEOPT_STACKINDENT] = width; + + switch (rec->dtrd_action) { + case DTRACEACT_USTACK: + case DTRACEACT_JSTACK: + err = dt_print_ustack(dtp, fp, format, addr, rec->dtrd_arg); + break; + + case DTRACEACT_STACK: + err = dt_print_stack(dtp, fp, format, addr, rec->dtrd_arg, + rec->dtrd_size / rec->dtrd_arg); + break; + + default: + assert(0); + } + + dtp->dt_options[DTRACEOPT_STACKINDENT] = saved; + + return (err); +} + +/*ARGSUSED*/ +static int +pfprint_time(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + char src[32], buf[32], *dst = buf; + hrtime_t time = *((uint64_t *)addr); + time_t sec = (time_t)(time / NANOSEC); + int i; + + /* + * ctime(3C) returns a string of the form "Dec 3 17:20:00 1973\n\0". + * Below, we turn this into the canonical adb/mdb /[yY] format, + * "1973 Dec 3 17:20:00". + */ +#if defined(sun) + (void) ctime_r(&sec, src, sizeof (src)); +#else + (void) ctime_r(&sec, src); +#endif + + /* + * Place the 4-digit year at the head of the string... + */ + for (i = 20; i < 24; i++) + *dst++ = src[i]; + + /* + * ...and follow it with the remainder (month, day, hh:mm:ss). + */ + for (i = 3; i < 19; i++) + *dst++ = src[i]; + + *dst = '\0'; + return (dt_printf(dtp, fp, format, buf)); +} + +/* + * This prints the time in RFC 822 standard form. This is useful for emitting + * notions of time that are consumed by standard tools (e.g., as part of an + * RSS feed). + */ +/*ARGSUSED*/ +static int +pfprint_time822(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + hrtime_t time = *((uint64_t *)addr); + time_t sec = (time_t)(time / NANOSEC); + struct tm tm; + char buf[64]; + + (void) localtime_r(&sec, &tm); + (void) strftime(buf, sizeof (buf), "%a, %d %b %G %T %Z", &tm); + return (dt_printf(dtp, fp, format, buf)); +} + +/*ARGSUSED*/ +static int +pfprint_port(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + uint16_t port = htons(*((uint16_t *)addr)); + char buf[256]; + struct servent *sv, res; + +#if defined(sun) + if ((sv = getservbyport_r(port, NULL, &res, buf, sizeof (buf))) != NULL) +#else + if (getservbyport_r(port, NULL, &res, buf, sizeof (buf), &sv) > 0) +#endif + return (dt_printf(dtp, fp, format, sv->s_name)); + + (void) snprintf(buf, sizeof (buf), "%d", *((uint16_t *)addr)); + return (dt_printf(dtp, fp, format, buf)); +} + +/*ARGSUSED*/ +static int +pfprint_inetaddr(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + char *s = alloca(size + 1); + struct hostent *host, res; + char inetaddr[NS_IN6ADDRSZ]; + char buf[1024]; + int e; + + bcopy(addr, s, size); + s[size] = '\0'; + + if (strchr(s, ':') == NULL && inet_pton(AF_INET, s, inetaddr) != -1) { +#if defined(sun) + if ((host = gethostbyaddr_r(inetaddr, NS_INADDRSZ, + AF_INET, &res, buf, sizeof (buf), &e)) != NULL) +#else + if (gethostbyaddr_r(inetaddr, NS_INADDRSZ, + AF_INET, &res, buf, sizeof (buf), &host, &e) > 0) +#endif + return (dt_printf(dtp, fp, format, host->h_name)); + } else if (inet_pton(AF_INET6, s, inetaddr) != -1) { + if ((host = getipnodebyaddr(inetaddr, NS_IN6ADDRSZ, + AF_INET6, &e)) != NULL) + return (dt_printf(dtp, fp, format, host->h_name)); + } + + return (dt_printf(dtp, fp, format, s)); +} + +/*ARGSUSED*/ +static int +pfprint_cstr(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + char *s = alloca(size + 1); + + bcopy(addr, s, size); + s[size] = '\0'; + return (dt_printf(dtp, fp, format, s)); +} + +/*ARGSUSED*/ +static int +pfprint_wstr(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + wchar_t *ws = alloca(size + sizeof (wchar_t)); + + bcopy(addr, ws, size); + ws[size / sizeof (wchar_t)] = L'\0'; + return (dt_printf(dtp, fp, format, ws)); +} + +/*ARGSUSED*/ +static int +pfprint_estr(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + char *s; + int n; + + if ((s = strchr2esc(addr, size)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + n = dt_printf(dtp, fp, format, s); + free(s); + return (n); +} + +static int +pfprint_echr(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + char c; + + switch (size) { + case sizeof (int8_t): + c = *(int8_t *)addr; + break; + case sizeof (int16_t): + c = *(int16_t *)addr; + break; + case sizeof (int32_t): + c = *(int32_t *)addr; + break; + default: + return (dt_set_errno(dtp, EDT_DMISMATCH)); + } + + return (pfprint_estr(dtp, fp, format, pfd, &c, 1, normal)); +} + +/*ARGSUSED*/ +static int +pfprint_pct(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + return (dt_printf(dtp, fp, "%%")); +} + +static const char pfproto_xint[] = "char, short, int, long, or long long"; +static const char pfproto_csi[] = "char, short, or int"; +static const char pfproto_fp[] = "float, double, or long double"; +static const char pfproto_addr[] = "pointer or integer"; +static const char pfproto_uaddr[] = + "pointer or integer (with -p/-c) or _usymaddr (without -p/-c)"; +static const char pfproto_cstr[] = "char [] or string (or use stringof)"; +static const char pfproto_wstr[] = "wchar_t []"; + +/* + * Printf format conversion dictionary. This table should match the set of + * conversions offered by printf(3C), as well as some additional extensions. + * The second parameter is an ASCII string which is either an actual type + * name we should look up (if pfcheck_type is specified), or just a descriptive + * string of the types expected for use in error messages. + */ +static const dt_pfconv_t _dtrace_conversions[] = { +{ "a", "s", pfproto_addr, pfcheck_kaddr, pfprint_addr }, +{ "A", "s", pfproto_uaddr, pfcheck_uaddr, pfprint_uaddr }, +{ "c", "c", pfproto_csi, pfcheck_csi, pfprint_sint }, +{ "C", "s", pfproto_csi, pfcheck_csi, pfprint_echr }, +{ "d", "d", pfproto_xint, pfcheck_dint, pfprint_dint }, +{ "e", "e", pfproto_fp, pfcheck_fp, pfprint_fp }, +{ "E", "E", pfproto_fp, pfcheck_fp, pfprint_fp }, +{ "f", "f", pfproto_fp, pfcheck_fp, pfprint_fp }, +{ "g", "g", pfproto_fp, pfcheck_fp, pfprint_fp }, +{ "G", "G", pfproto_fp, pfcheck_fp, pfprint_fp }, +{ "hd", "d", "short", pfcheck_type, pfprint_sint }, +{ "hi", "i", "short", pfcheck_type, pfprint_sint }, +{ "ho", "o", "unsigned short", pfcheck_type, pfprint_uint }, +{ "hu", "u", "unsigned short", pfcheck_type, pfprint_uint }, +{ "hx", "x", "short", pfcheck_xshort, pfprint_uint }, +{ "hX", "X", "short", pfcheck_xshort, pfprint_uint }, +{ "i", "i", pfproto_xint, pfcheck_xint, pfprint_sint }, +{ "I", "s", pfproto_cstr, pfcheck_str, pfprint_inetaddr }, +{ "k", "s", "stack", pfcheck_stack, pfprint_stack }, +{ "lc", "lc", "int", pfcheck_type, pfprint_sint }, /* a.k.a. wint_t */ +{ "ld", "d", "long", pfcheck_type, pfprint_sint }, +{ "li", "i", "long", pfcheck_type, pfprint_sint }, +{ "lo", "o", "unsigned long", pfcheck_type, pfprint_uint }, +{ "lu", "u", "unsigned long", pfcheck_type, pfprint_uint }, +{ "ls", "ls", pfproto_wstr, pfcheck_wstr, pfprint_wstr }, +{ "lx", "x", "long", pfcheck_xlong, pfprint_uint }, +{ "lX", "X", "long", pfcheck_xlong, pfprint_uint }, +{ "lld", "d", "long long", pfcheck_type, pfprint_sint }, +{ "lli", "i", "long long", pfcheck_type, pfprint_sint }, +{ "llo", "o", "unsigned long long", pfcheck_type, pfprint_uint }, +{ "llu", "u", "unsigned long long", pfcheck_type, pfprint_uint }, +{ "llx", "x", "long long", pfcheck_xlonglong, pfprint_uint }, +{ "llX", "X", "long long", pfcheck_xlonglong, pfprint_uint }, +{ "Le", "e", "long double", pfcheck_type, pfprint_fp }, +{ "LE", "E", "long double", pfcheck_type, pfprint_fp }, +{ "Lf", "f", "long double", pfcheck_type, pfprint_fp }, +{ "Lg", "g", "long double", pfcheck_type, pfprint_fp }, +{ "LG", "G", "long double", pfcheck_type, pfprint_fp }, +{ "o", "o", pfproto_xint, pfcheck_xint, pfprint_uint }, +{ "p", "x", pfproto_addr, pfcheck_addr, pfprint_uint }, +{ "P", "s", "uint16_t", pfcheck_type, pfprint_port }, +{ "s", "s", "char [] or string (or use stringof)", pfcheck_str, pfprint_cstr }, +{ "S", "s", pfproto_cstr, pfcheck_str, pfprint_estr }, +{ "T", "s", "int64_t", pfcheck_time, pfprint_time822 }, +{ "u", "u", pfproto_xint, pfcheck_xint, pfprint_uint }, +#if defined(sun) +{ "wc", "wc", "int", pfcheck_type, pfprint_sint }, /* a.k.a. wchar_t */ +{ "ws", "ws", pfproto_wstr, pfcheck_wstr, pfprint_wstr }, +#else +{ "wc", "lc", "int", pfcheck_type, pfprint_sint }, /* a.k.a. wchar_t */ +{ "ws", "ls", pfproto_wstr, pfcheck_wstr, pfprint_wstr }, +#endif +{ "x", "x", pfproto_xint, pfcheck_xint, pfprint_uint }, +{ "X", "X", pfproto_xint, pfcheck_xint, pfprint_uint }, +{ "Y", "s", "int64_t", pfcheck_time, pfprint_time }, +{ "%", "%", "void", pfcheck_type, pfprint_pct }, +{ NULL, NULL, NULL, NULL, NULL } +}; + +int +dt_pfdict_create(dtrace_hdl_t *dtp) +{ + uint_t n = _dtrace_strbuckets; + const dt_pfconv_t *pfd; + dt_pfdict_t *pdi; + + if ((pdi = malloc(sizeof (dt_pfdict_t))) == NULL || + (pdi->pdi_buckets = malloc(sizeof (dt_pfconv_t *) * n)) == NULL) { + free(pdi); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + dtp->dt_pfdict = pdi; + bzero(pdi->pdi_buckets, sizeof (dt_pfconv_t *) * n); + pdi->pdi_nbuckets = n; + + for (pfd = _dtrace_conversions; pfd->pfc_name != NULL; pfd++) { + dtrace_typeinfo_t dtt; + dt_pfconv_t *pfc; + uint_t h; + + if ((pfc = malloc(sizeof (dt_pfconv_t))) == NULL) { + dt_pfdict_destroy(dtp); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + bcopy(pfd, pfc, sizeof (dt_pfconv_t)); + h = dt_strtab_hash(pfc->pfc_name, NULL) % n; + pfc->pfc_next = pdi->pdi_buckets[h]; + pdi->pdi_buckets[h] = pfc; + + dtt.dtt_ctfp = NULL; + dtt.dtt_type = CTF_ERR; + + /* + * The "D" container or its parent must contain a definition of + * any type referenced by a printf conversion. If none can be + * found, we fail to initialize the printf dictionary. + */ + if (pfc->pfc_check == &pfcheck_type && dtrace_lookup_by_type( + dtp, DTRACE_OBJ_DDEFS, pfc->pfc_tstr, &dtt) != 0) { + dt_pfdict_destroy(dtp); + return (dt_set_errno(dtp, EDT_NOCONV)); + } + + pfc->pfc_dctfp = dtt.dtt_ctfp; + pfc->pfc_dtype = dtt.dtt_type; + + /* + * The "C" container may contain an alternate definition of an + * explicit conversion type. If it does, use it; otherwise + * just set pfc_ctype to pfc_dtype so it is always valid. + */ + if (pfc->pfc_check == &pfcheck_type && dtrace_lookup_by_type( + dtp, DTRACE_OBJ_CDEFS, pfc->pfc_tstr, &dtt) == 0) { + pfc->pfc_cctfp = dtt.dtt_ctfp; + pfc->pfc_ctype = dtt.dtt_type; + } else { + pfc->pfc_cctfp = pfc->pfc_dctfp; + pfc->pfc_ctype = pfc->pfc_dtype; + } + + if (pfc->pfc_check == NULL || pfc->pfc_print == NULL || + pfc->pfc_ofmt == NULL || pfc->pfc_tstr == NULL) { + dt_pfdict_destroy(dtp); + return (dt_set_errno(dtp, EDT_BADCONV)); + } + + dt_dprintf("loaded printf conversion %%%s\n", pfc->pfc_name); + } + + return (0); +} + +void +dt_pfdict_destroy(dtrace_hdl_t *dtp) +{ + dt_pfdict_t *pdi = dtp->dt_pfdict; + dt_pfconv_t *pfc, *nfc; + uint_t i; + + if (pdi == NULL) + return; + + for (i = 0; i < pdi->pdi_nbuckets; i++) { + for (pfc = pdi->pdi_buckets[i]; pfc != NULL; pfc = nfc) { + nfc = pfc->pfc_next; + free(pfc); + } + } + + free(pdi->pdi_buckets); + free(pdi); + dtp->dt_pfdict = NULL; +} + +static const dt_pfconv_t * +dt_pfdict_lookup(dtrace_hdl_t *dtp, const char *name) +{ + dt_pfdict_t *pdi = dtp->dt_pfdict; + uint_t h = dt_strtab_hash(name, NULL) % pdi->pdi_nbuckets; + const dt_pfconv_t *pfc; + + for (pfc = pdi->pdi_buckets[h]; pfc != NULL; pfc = pfc->pfc_next) { + if (strcmp(pfc->pfc_name, name) == 0) + break; + } + + return (pfc); +} + +static dt_pfargv_t * +dt_printf_error(dtrace_hdl_t *dtp, int err) +{ + if (yypcb != NULL) + longjmp(yypcb->pcb_jmpbuf, err); + + (void) dt_set_errno(dtp, err); + return (NULL); +} + +dt_pfargv_t * +dt_printf_create(dtrace_hdl_t *dtp, const char *s) +{ + dt_pfargd_t *pfd, *nfd = NULL; + dt_pfargv_t *pfv; + const char *p, *q; + char *format; + + if ((pfv = malloc(sizeof (dt_pfargv_t))) == NULL || + (format = strdup(s)) == NULL) { + free(pfv); + return (dt_printf_error(dtp, EDT_NOMEM)); + } + + pfv->pfv_format = format; + pfv->pfv_argv = NULL; + pfv->pfv_argc = 0; + pfv->pfv_flags = 0; + pfv->pfv_dtp = dtp; + + for (q = format; (p = strchr(q, '%')) != NULL; q = *p ? p + 1 : p) { + uint_t namelen = 0; + int digits = 0; + int dot = 0; + + char name[8]; + char c; + int n; + + if ((pfd = malloc(sizeof (dt_pfargd_t))) == NULL) { + dt_printf_destroy(pfv); + return (dt_printf_error(dtp, EDT_NOMEM)); + } + + if (pfv->pfv_argv != NULL) + nfd->pfd_next = pfd; + else + pfv->pfv_argv = pfd; + + bzero(pfd, sizeof (dt_pfargd_t)); + pfv->pfv_argc++; + nfd = pfd; + + if (p > q) { + pfd->pfd_preflen = (size_t)(p - q); + pfd->pfd_prefix = q; + } + + fmt_switch: + switch (c = *++p) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (dot == 0 && digits == 0 && c == '0') { + pfd->pfd_flags |= DT_PFCONV_ZPAD; + pfd->pfd_flags &= ~DT_PFCONV_LEFT; + goto fmt_switch; + } + + for (n = 0; isdigit(c); c = *++p) + n = n * 10 + c - '0'; + + if (dot) + pfd->pfd_prec = n; + else + pfd->pfd_width = n; + + p--; + digits++; + goto fmt_switch; + + case '#': + pfd->pfd_flags |= DT_PFCONV_ALT; + goto fmt_switch; + + case '*': + n = dot ? DT_PFCONV_DYNPREC : DT_PFCONV_DYNWIDTH; + + if (pfd->pfd_flags & n) { + yywarn("format conversion #%u has more than " + "one '*' specified for the output %s\n", + pfv->pfv_argc, n ? "precision" : "width"); + + dt_printf_destroy(pfv); + return (dt_printf_error(dtp, EDT_COMPILER)); + } + + pfd->pfd_flags |= n; + goto fmt_switch; + + case '+': + pfd->pfd_flags |= DT_PFCONV_SPOS; + goto fmt_switch; + + case '-': + pfd->pfd_flags |= DT_PFCONV_LEFT; + pfd->pfd_flags &= ~DT_PFCONV_ZPAD; + goto fmt_switch; + + case '.': + if (dot++ != 0) { + yywarn("format conversion #%u has more than " + "one '.' specified\n", pfv->pfv_argc); + + dt_printf_destroy(pfv); + return (dt_printf_error(dtp, EDT_COMPILER)); + } + digits = 0; + goto fmt_switch; + + case '?': + if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64) + pfd->pfd_width = 16; + else + pfd->pfd_width = 8; + goto fmt_switch; + + case '@': + pfd->pfd_flags |= DT_PFCONV_AGG; + goto fmt_switch; + + case '\'': + pfd->pfd_flags |= DT_PFCONV_GROUP; + goto fmt_switch; + + case ' ': + pfd->pfd_flags |= DT_PFCONV_SPACE; + goto fmt_switch; + + case '$': + yywarn("format conversion #%u uses unsupported " + "positional format (%%n$)\n", pfv->pfv_argc); + + dt_printf_destroy(pfv); + return (dt_printf_error(dtp, EDT_COMPILER)); + + case '%': + if (p[-1] == '%') + goto default_lbl; /* if %% then use "%" conv */ + + yywarn("format conversion #%u cannot be combined " + "with other format flags: %%%%\n", pfv->pfv_argc); + + dt_printf_destroy(pfv); + return (dt_printf_error(dtp, EDT_COMPILER)); + + case '\0': + yywarn("format conversion #%u name expected before " + "end of format string\n", pfv->pfv_argc); + + dt_printf_destroy(pfv); + return (dt_printf_error(dtp, EDT_COMPILER)); + + case 'h': + case 'l': + case 'L': + case 'w': + if (namelen < sizeof (name) - 2) + name[namelen++] = c; + goto fmt_switch; + + default_lbl: + default: + name[namelen++] = c; + name[namelen] = '\0'; + } + + pfd->pfd_conv = dt_pfdict_lookup(dtp, name); + + if (pfd->pfd_conv == NULL) { + yywarn("format conversion #%u is undefined: %%%s\n", + pfv->pfv_argc, name); + dt_printf_destroy(pfv); + return (dt_printf_error(dtp, EDT_COMPILER)); + } + } + + if (*q != '\0' || *format == '\0') { + if ((pfd = malloc(sizeof (dt_pfargd_t))) == NULL) { + dt_printf_destroy(pfv); + return (dt_printf_error(dtp, EDT_NOMEM)); + } + + if (pfv->pfv_argv != NULL) + nfd->pfd_next = pfd; + else + pfv->pfv_argv = pfd; + + bzero(pfd, sizeof (dt_pfargd_t)); + pfv->pfv_argc++; + + pfd->pfd_prefix = q; + pfd->pfd_preflen = strlen(q); + } + + return (pfv); +} + +void +dt_printf_destroy(dt_pfargv_t *pfv) +{ + dt_pfargd_t *pfd, *nfd; + + for (pfd = pfv->pfv_argv; pfd != NULL; pfd = nfd) { + nfd = pfd->pfd_next; + free(pfd); + } + + free(pfv->pfv_format); + free(pfv); +} + +void +dt_printf_validate(dt_pfargv_t *pfv, uint_t flags, + dt_ident_t *idp, int foff, dtrace_actkind_t kind, dt_node_t *dnp) +{ + dt_pfargd_t *pfd = pfv->pfv_argv; + const char *func = idp->di_name; + + char n[DT_TYPE_NAMELEN]; + dtrace_typeinfo_t dtt; + const char *aggtype; + dt_node_t aggnode; + int i, j; + + if (pfv->pfv_format[0] == '\0') { + xyerror(D_PRINTF_FMT_EMPTY, + "%s( ) format string is empty\n", func); + } + + pfv->pfv_flags = flags; + + /* + * We fake up a parse node representing the type that can be used with + * an aggregation result conversion, which -- for all but count() -- + * is a signed quantity. + */ + if (kind != DTRACEAGG_COUNT) + aggtype = "int64_t"; + else + aggtype = "uint64_t"; + + if (dt_type_lookup(aggtype, &dtt) != 0) + xyerror(D_TYPE_ERR, "failed to lookup agg type %s\n", aggtype); + + bzero(&aggnode, sizeof (aggnode)); + dt_node_type_assign(&aggnode, dtt.dtt_ctfp, dtt.dtt_type, B_FALSE); + + for (i = 0, j = 0; i < pfv->pfv_argc; i++, pfd = pfd->pfd_next) { + const dt_pfconv_t *pfc = pfd->pfd_conv; + const char *dyns[2]; + int dync = 0; + + char vname[64]; + dt_node_t *vnp; + + if (pfc == NULL) + continue; /* no checking if argd is just a prefix */ + + if (pfc->pfc_print == &pfprint_pct) { + (void) strcat(pfd->pfd_fmt, pfc->pfc_ofmt); + continue; + } + + if (pfd->pfd_flags & DT_PFCONV_DYNPREC) + dyns[dync++] = ".*"; + if (pfd->pfd_flags & DT_PFCONV_DYNWIDTH) + dyns[dync++] = "*"; + + for (; dync != 0; dync--) { + if (dnp == NULL) { + xyerror(D_PRINTF_DYN_PROTO, + "%s( ) prototype mismatch: conversion " + "#%d (%%%s) is missing a corresponding " + "\"%s\" argument\n", func, i + 1, + pfc->pfc_name, dyns[dync - 1]); + } + + if (dt_node_is_integer(dnp) == 0) { + xyerror(D_PRINTF_DYN_TYPE, + "%s( ) argument #%d is incompatible " + "with conversion #%d prototype:\n" + "\tconversion: %% %s %s\n" + "\t prototype: int\n\t argument: %s\n", + func, j + foff + 1, i + 1, + dyns[dync - 1], pfc->pfc_name, + dt_node_type_name(dnp, n, sizeof (n))); + } + + dnp = dnp->dn_list; + j++; + } + + /* + * If this conversion is consuming the aggregation data, set + * the value node pointer (vnp) to a fake node based on the + * aggregating function result type. Otherwise assign vnp to + * the next parse node in the argument list, if there is one. + */ + if (pfd->pfd_flags & DT_PFCONV_AGG) { + if (!(flags & DT_PRINTF_AGGREGATION)) { + xyerror(D_PRINTF_AGG_CONV, + "%%@ conversion requires an aggregation" + " and is not for use with %s( )\n", func); + } + (void) strlcpy(vname, "aggregating action", + sizeof (vname)); + vnp = &aggnode; + } else if (dnp == NULL) { + xyerror(D_PRINTF_ARG_PROTO, + "%s( ) prototype mismatch: conversion #%d (%%" + "%s) is missing a corresponding value argument\n", + func, i + 1, pfc->pfc_name); + } else { + (void) snprintf(vname, sizeof (vname), + "argument #%d", j + foff + 1); + vnp = dnp; + dnp = dnp->dn_list; + j++; + } + + /* + * Fill in the proposed final format string by prepending any + * size-related prefixes to the pfconv's format string. The + * pfc_check() function below may optionally modify the format + * as part of validating the type of the input argument. + */ + if (pfc->pfc_print == &pfprint_sint || + pfc->pfc_print == &pfprint_uint || + pfc->pfc_print == &pfprint_dint) { + if (dt_node_type_size(vnp) == sizeof (uint64_t)) + (void) strcpy(pfd->pfd_fmt, "ll"); + } else if (pfc->pfc_print == &pfprint_fp) { + if (dt_node_type_size(vnp) == sizeof (long double)) + (void) strcpy(pfd->pfd_fmt, "L"); + } + + (void) strcat(pfd->pfd_fmt, pfc->pfc_ofmt); + + /* + * Validate the format conversion against the value node type. + * If the conversion is good, create the descriptor format + * string by concatenating together any required printf(3C) + * size prefixes with the conversion's native format string. + */ + if (pfc->pfc_check(pfv, pfd, vnp) == 0) { + xyerror(D_PRINTF_ARG_TYPE, + "%s( ) %s is incompatible with " + "conversion #%d prototype:\n\tconversion: %%%s\n" + "\t prototype: %s\n\t argument: %s\n", func, + vname, i + 1, pfc->pfc_name, pfc->pfc_tstr, + dt_node_type_name(vnp, n, sizeof (n))); + } + } + + if ((flags & DT_PRINTF_EXACTLEN) && dnp != NULL) { + xyerror(D_PRINTF_ARG_EXTRA, + "%s( ) prototype mismatch: only %d arguments " + "required by this format string\n", func, j); + } +} + +void +dt_printa_validate(dt_node_t *lhs, dt_node_t *rhs) +{ + dt_ident_t *lid, *rid; + dt_node_t *lproto, *rproto; + int largc, rargc, argn; + char n1[DT_TYPE_NAMELEN]; + char n2[DT_TYPE_NAMELEN]; + + assert(lhs->dn_kind == DT_NODE_AGG); + assert(rhs->dn_kind == DT_NODE_AGG); + + lid = lhs->dn_ident; + rid = rhs->dn_ident; + + lproto = ((dt_idsig_t *)lid->di_data)->dis_args; + rproto = ((dt_idsig_t *)rid->di_data)->dis_args; + + /* + * First, get an argument count on each side. These must match. + */ + for (largc = 0; lproto != NULL; lproto = lproto->dn_list) + largc++; + + for (rargc = 0; rproto != NULL; rproto = rproto->dn_list) + rargc++; + + if (largc != rargc) { + xyerror(D_PRINTA_AGGKEY, "printa( ): @%s and @%s do not have " + "matching key signatures: @%s has %d key%s, @%s has %d " + "key%s", lid->di_name, rid->di_name, + lid->di_name, largc, largc == 1 ? "" : "s", + rid->di_name, rargc, rargc == 1 ? "" : "s"); + } + + /* + * Now iterate over the keys to verify that each type matches. + */ + lproto = ((dt_idsig_t *)lid->di_data)->dis_args; + rproto = ((dt_idsig_t *)rid->di_data)->dis_args; + + for (argn = 1; lproto != NULL; argn++, lproto = lproto->dn_list, + rproto = rproto->dn_list) { + assert(rproto != NULL); + + if (dt_node_is_argcompat(lproto, rproto)) + continue; + + xyerror(D_PRINTA_AGGPROTO, "printa( ): @%s[ ] key #%d is " + "incompatible with @%s:\n%9s key #%d: %s\n" + "%9s key #%d: %s\n", + rid->di_name, argn, lid->di_name, lid->di_name, argn, + dt_node_type_name(lproto, n1, sizeof (n1)), rid->di_name, + argn, dt_node_type_name(rproto, n2, sizeof (n2))); + } +} + +static int +dt_printf_getint(dtrace_hdl_t *dtp, const dtrace_recdesc_t *recp, + uint_t nrecs, const void *buf, size_t len, int *ip) +{ + uintptr_t addr; + + if (nrecs == 0) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + addr = (uintptr_t)buf + recp->dtrd_offset; + + if (addr + sizeof (int) > (uintptr_t)buf + len) + return (dt_set_errno(dtp, EDT_DOFFSET)); + + if (addr & (recp->dtrd_alignment - 1)) + return (dt_set_errno(dtp, EDT_DALIGN)); + + switch (recp->dtrd_size) { + case sizeof (int8_t): + *ip = (int)*((int8_t *)addr); + break; + case sizeof (int16_t): + *ip = (int)*((int16_t *)addr); + break; + case sizeof (int32_t): + *ip = (int)*((int32_t *)addr); + break; + case sizeof (int64_t): + *ip = (int)*((int64_t *)addr); + break; + default: + return (dt_set_errno(dtp, EDT_DMISMATCH)); + } + + return (0); +} + +/*ARGSUSED*/ +static int +pfprint_average(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + const uint64_t *data = addr; + + if (size != sizeof (uint64_t) * 2) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + return (dt_printf(dtp, fp, format, + data[0] ? data[1] / normal / data[0] : 0)); +} + +/*ARGSUSED*/ +static int +pfprint_stddev(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + const uint64_t *data = addr; + + if (size != sizeof (uint64_t) * 4) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + return (dt_printf(dtp, fp, format, + dt_stddev((uint64_t *)data, normal))); +} + +/*ARGSUSED*/ +static int +pfprint_quantize(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + return (dt_print_quantize(dtp, fp, addr, size, normal)); +} + +/*ARGSUSED*/ +static int +pfprint_lquantize(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + return (dt_print_lquantize(dtp, fp, addr, size, normal)); +} + +/*ARGSUSED*/ +static int +pfprint_llquantize(dtrace_hdl_t *dtp, FILE *fp, const char *format, + const dt_pfargd_t *pfd, const void *addr, size_t size, uint64_t normal) +{ + return (dt_print_llquantize(dtp, fp, addr, size, normal)); +} + +static int +dt_printf_format(dtrace_hdl_t *dtp, FILE *fp, const dt_pfargv_t *pfv, + const dtrace_recdesc_t *recs, uint_t nrecs, const void *buf, + size_t len, const dtrace_aggdata_t **aggsdata, int naggvars) +{ + dt_pfargd_t *pfd = pfv->pfv_argv; + const dtrace_recdesc_t *recp = recs; + const dtrace_aggdata_t *aggdata; + dtrace_aggdesc_t *agg; + caddr_t lim = (caddr_t)buf + len, limit; + char format[64] = "%"; + int i, aggrec, curagg = -1; + uint64_t normal; + + /* + * If we are formatting an aggregation, set 'aggrec' to the index of + * the final record description (the aggregation result) so we can use + * this record index with any conversion where DT_PFCONV_AGG is set. + * (The actual aggregation used will vary as we increment through the + * aggregation variables that we have been passed.) Finally, we + * decrement nrecs to prevent this record from being used with any + * other conversion. + */ + if (pfv->pfv_flags & DT_PRINTF_AGGREGATION) { + assert(aggsdata != NULL); + assert(naggvars > 0); + + if (nrecs == 0) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + curagg = naggvars > 1 ? 1 : 0; + aggdata = aggsdata[0]; + aggrec = aggdata->dtada_desc->dtagd_nrecs - 1; + nrecs--; + } + + for (i = 0; i < pfv->pfv_argc; i++, pfd = pfd->pfd_next) { + const dt_pfconv_t *pfc = pfd->pfd_conv; + int width = pfd->pfd_width; + int prec = pfd->pfd_prec; + int rval; + + char *f = format + 1; /* skip initial '%' */ + const dtrace_recdesc_t *rec; + dt_pfprint_f *func; + caddr_t addr; + size_t size; + uint32_t flags; + + if (pfd->pfd_preflen != 0) { + char *tmp = alloca(pfd->pfd_preflen + 1); + + bcopy(pfd->pfd_prefix, tmp, pfd->pfd_preflen); + tmp[pfd->pfd_preflen] = '\0'; + + if ((rval = dt_printf(dtp, fp, tmp)) < 0) + return (rval); + + if (pfv->pfv_flags & DT_PRINTF_AGGREGATION) { + /* + * For printa(), we flush the buffer after each + * prefix, setting the flags to indicate that + * this is part of the printa() format string. + */ + flags = DTRACE_BUFDATA_AGGFORMAT; + + if (pfc == NULL && i == pfv->pfv_argc - 1) + flags |= DTRACE_BUFDATA_AGGLAST; + + if (dt_buffered_flush(dtp, NULL, NULL, + aggdata, flags) < 0) + return (-1); + } + } + + if (pfc == NULL) { + if (pfv->pfv_argc == 1) + return (nrecs != 0); + continue; + } + + /* + * If the conversion is %%, just invoke the print callback + * with no data record and continue; it consumes no record. + */ + if (pfc->pfc_print == &pfprint_pct) { + if (pfc->pfc_print(dtp, fp, NULL, pfd, NULL, 0, 1) >= 0) + continue; + return (-1); /* errno is set for us */ + } + + if (pfd->pfd_flags & DT_PFCONV_DYNWIDTH) { + if (dt_printf_getint(dtp, recp++, nrecs--, buf, + len, &width) == -1) + return (-1); /* errno is set for us */ + pfd->pfd_dynwidth = width; + } else { + pfd->pfd_dynwidth = 0; + } + + if ((pfd->pfd_flags & DT_PFCONV_DYNPREC) && dt_printf_getint( + dtp, recp++, nrecs--, buf, len, &prec) == -1) + return (-1); /* errno is set for us */ + + if (pfd->pfd_flags & DT_PFCONV_AGG) { + /* + * This should be impossible -- the compiler shouldn't + * create a DT_PFCONV_AGG conversion without an + * aggregation present. Still, we'd rather fail + * gracefully than blow up... + */ + if (aggsdata == NULL) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + aggdata = aggsdata[curagg]; + agg = aggdata->dtada_desc; + + /* + * We increment the current aggregation variable, but + * not beyond the number of aggregation variables that + * we're printing. This has the (desired) effect that + * DT_PFCONV_AGG conversions beyond the number of + * aggregation variables (re-)convert the aggregation + * value of the last aggregation variable. + */ + if (curagg < naggvars - 1) + curagg++; + + rec = &agg->dtagd_rec[aggrec]; + addr = aggdata->dtada_data + rec->dtrd_offset; + limit = addr + aggdata->dtada_size; + normal = aggdata->dtada_normal; + flags = DTRACE_BUFDATA_AGGVAL; + } else { + if (nrecs == 0) + return (dt_set_errno(dtp, EDT_DMISMATCH)); + + if (pfv->pfv_flags & DT_PRINTF_AGGREGATION) { + /* + * When printing aggregation keys, we always + * set the aggdata to be the representative + * (zeroth) aggregation. The aggdata isn't + * actually used here in this case, but it is + * passed to the buffer handler and must + * therefore still be correct. + */ + aggdata = aggsdata[0]; + flags = DTRACE_BUFDATA_AGGKEY; + } + + rec = recp++; + nrecs--; + addr = (caddr_t)buf + rec->dtrd_offset; + limit = lim; + normal = 1; + } + + size = rec->dtrd_size; + + if (addr + size > limit) { + dt_dprintf("bad size: addr=%p size=0x%x lim=%p\n", + (void *)addr, rec->dtrd_size, (void *)lim); + return (dt_set_errno(dtp, EDT_DOFFSET)); + } + + if (rec->dtrd_alignment != 0 && + ((uintptr_t)addr & (rec->dtrd_alignment - 1)) != 0) { + dt_dprintf("bad align: addr=%p size=0x%x align=0x%x\n", + (void *)addr, rec->dtrd_size, rec->dtrd_alignment); + return (dt_set_errno(dtp, EDT_DALIGN)); + } + + switch (rec->dtrd_action) { + case DTRACEAGG_AVG: + func = pfprint_average; + break; + case DTRACEAGG_STDDEV: + func = pfprint_stddev; + break; + case DTRACEAGG_QUANTIZE: + func = pfprint_quantize; + break; + case DTRACEAGG_LQUANTIZE: + func = pfprint_lquantize; + break; + case DTRACEAGG_LLQUANTIZE: + func = pfprint_llquantize; + break; + case DTRACEACT_MOD: + func = pfprint_mod; + break; + case DTRACEACT_UMOD: + func = pfprint_umod; + break; + default: + func = pfc->pfc_print; + break; + } + + if (pfd->pfd_flags & DT_PFCONV_ALT) + *f++ = '#'; + if (pfd->pfd_flags & DT_PFCONV_ZPAD) + *f++ = '0'; + if (width < 0 || (pfd->pfd_flags & DT_PFCONV_LEFT)) + *f++ = '-'; + if (pfd->pfd_flags & DT_PFCONV_SPOS) + *f++ = '+'; + if (pfd->pfd_flags & DT_PFCONV_GROUP) + *f++ = '\''; + if (pfd->pfd_flags & DT_PFCONV_SPACE) + *f++ = ' '; + + /* + * If we're printing a stack and DT_PFCONV_LEFT is set, we + * don't add the width to the format string. See the block + * comment in pfprint_stack() for a description of the + * behavior in this case. + */ + if (func == pfprint_stack && (pfd->pfd_flags & DT_PFCONV_LEFT)) + width = 0; + + if (width != 0) + f += snprintf(f, sizeof (format), "%d", ABS(width)); + + if (prec > 0) + f += snprintf(f, sizeof (format), ".%d", prec); + + (void) strcpy(f, pfd->pfd_fmt); + pfd->pfd_rec = rec; + + if (func(dtp, fp, format, pfd, addr, size, normal) < 0) + return (-1); /* errno is set for us */ + + if (pfv->pfv_flags & DT_PRINTF_AGGREGATION) { + /* + * For printa(), we flush the buffer after each tuple + * element, inidicating that this is the last record + * as appropriate. + */ + if (i == pfv->pfv_argc - 1) + flags |= DTRACE_BUFDATA_AGGLAST; + + if (dt_buffered_flush(dtp, NULL, + rec, aggdata, flags) < 0) + return (-1); + } + } + + return ((int)(recp - recs)); +} + +int +dtrace_sprintf(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata, + const dtrace_recdesc_t *recp, uint_t nrecs, const void *buf, size_t len) +{ + dtrace_optval_t size; + int rval; + + rval = dtrace_getopt(dtp, "strsize", &size); + assert(rval == 0); + assert(dtp->dt_sprintf_buflen == 0); + + if (dtp->dt_sprintf_buf != NULL) + free(dtp->dt_sprintf_buf); + + if ((dtp->dt_sprintf_buf = malloc(size)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + bzero(dtp->dt_sprintf_buf, size); + dtp->dt_sprintf_buflen = size; + rval = dt_printf_format(dtp, fp, fmtdata, recp, nrecs, buf, len, + NULL, 0); + dtp->dt_sprintf_buflen = 0; + + if (rval == -1) + free(dtp->dt_sprintf_buf); + + return (rval); +} + +/*ARGSUSED*/ +int +dtrace_system(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata, + const dtrace_probedata_t *data, const dtrace_recdesc_t *recp, + uint_t nrecs, const void *buf, size_t len) +{ + int rval = dtrace_sprintf(dtp, fp, fmtdata, recp, nrecs, buf, len); + + if (rval == -1) + return (rval); + + /* + * Before we execute the specified command, flush fp to assure that + * any prior dt_printf()'s appear before the output of the command + * not after it. + */ + (void) fflush(fp); + + if (system(dtp->dt_sprintf_buf) == -1) + return (dt_set_errno(dtp, errno)); + + return (rval); +} + +int +dtrace_freopen(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata, + const dtrace_probedata_t *data, const dtrace_recdesc_t *recp, + uint_t nrecs, const void *buf, size_t len) +{ + char selfbuf[40], restorebuf[40], *filename; + FILE *nfp; + int rval, errval; + dt_pfargv_t *pfv = fmtdata; + dt_pfargd_t *pfd = pfv->pfv_argv; + + rval = dtrace_sprintf(dtp, fp, fmtdata, recp, nrecs, buf, len); + + if (rval == -1 || fp == NULL) + return (rval); + +#if defined(sun) + if (pfd->pfd_preflen != 0 && + strcmp(pfd->pfd_prefix, DT_FREOPEN_RESTORE) == 0) { + /* + * The only way to have the format string set to the value + * DT_FREOPEN_RESTORE is via the empty freopen() string -- + * denoting that we should restore the old stdout. + */ + assert(strcmp(dtp->dt_sprintf_buf, DT_FREOPEN_RESTORE) == 0); + + if (dtp->dt_stdout_fd == -1) { + /* + * We could complain here by generating an error, + * but it seems like overkill: it seems that calling + * freopen() to restore stdout when freopen() has + * never before been called should just be a no-op, + * so we just return in this case. + */ + return (rval); + } + + (void) snprintf(restorebuf, sizeof (restorebuf), + "/dev/fd/%d", dtp->dt_stdout_fd); + filename = restorebuf; + } else { + filename = dtp->dt_sprintf_buf; + } + + /* + * freopen(3C) will always close the specified stream and underlying + * file descriptor -- even if the specified file can't be opened. + * Even for the semantic cesspool that is standard I/O, this is + * surprisingly brain-dead behavior: it means that any failure to + * open the specified file destroys the specified stream in the + * process -- which is particularly relevant when the specified stream + * happens (or rather, happened) to be stdout. This could be resolved + * were there an "fdreopen()" equivalent of freopen() that allowed one + * to pass a file descriptor instead of the name of a file, but there + * is no such thing. However, we can effect this ourselves by first + * fopen()'ing the desired file, and then (assuming that that works), + * freopen()'ing "/dev/fd/[fileno]", where [fileno] is the underlying + * file descriptor for the fopen()'d file. This way, if the fopen() + * fails, we can fail the operation without destroying stdout. + */ + if ((nfp = fopen(filename, "aF")) == NULL) { + char *msg = strerror(errno); + char *faultstr; + int len = 80; + + len += strlen(msg) + strlen(filename); + faultstr = alloca(len); + + (void) snprintf(faultstr, len, "couldn't freopen() \"%s\": %s", + filename, strerror(errno)); + + if ((errval = dt_handle_liberr(dtp, data, faultstr)) == 0) + return (rval); + + return (errval); + } + + (void) snprintf(selfbuf, sizeof (selfbuf), "/dev/fd/%d", fileno(nfp)); + + if (dtp->dt_stdout_fd == -1) { + /* + * If this is the first time that we're calling freopen(), + * we're going to stash away the file descriptor for stdout. + * We don't expect the dup(2) to fail, so if it does we must + * return failure. + */ + if ((dtp->dt_stdout_fd = dup(fileno(fp))) == -1) { + (void) fclose(nfp); + return (dt_set_errno(dtp, errno)); + } + } + + if (freopen(selfbuf, "aF", fp) == NULL) { + (void) fclose(nfp); + return (dt_set_errno(dtp, errno)); + } + + (void) fclose(nfp); +#else + /* + * The 'standard output' (which is not necessarily stdout) + * treatment on FreeBSD is implemented differently than on + * Solaris because FreeBSD's freopen() will attempt to re-use + * the current file descriptor, causing the previous file to + * be closed and thereby preventing it from be re-activated + * later. + * + * For FreeBSD we use the concept of setting an output file + * pointer in the DTrace handle if a dtrace_freopen() has + * enabled another output file and we leave the caller's + * file pointer untouched. If it was actually stdout, then + * stdout remains open. If it was another file, then that + * file remains open. While a dtrace_freopen() has activated + * another file, we keep a pointer to that which we use in + * the output functions by preference and only use the caller's + * file pointer if no dtrace_freopen() call has been made. + * + * The check to see if we're re-activating the caller's + * output file is much the same as on Solaris. + */ + if (pfd->pfd_preflen != 0 && + strcmp(pfd->pfd_prefix, DT_FREOPEN_RESTORE) == 0) { + /* + * The only way to have the format string set to the value + * DT_FREOPEN_RESTORE is via the empty freopen() string -- + * denoting that we should restore the old stdout. + */ + assert(strcmp(dtp->dt_sprintf_buf, DT_FREOPEN_RESTORE) == 0); + + if (dtp->dt_freopen_fp == NULL) { + /* + * We could complain here by generating an error, + * but it seems like overkill: it seems that calling + * freopen() to restore stdout when freopen() has + * never before been called should just be a no-op, + * so we just return in this case. + */ + return (rval); + } + + /* + * At this point, to re-active the original output file, + * on FreeBSD we only code the current file that this + * function opened previously. + */ + (void) fclose(dtp->dt_freopen_fp); + dtp->dt_freopen_fp = NULL; + + return (rval); + } + + if ((nfp = fopen(dtp->dt_sprintf_buf, "a")) == NULL) { + char *msg = strerror(errno); + char *faultstr; + int len = 80; + + len += strlen(msg) + strlen(dtp->dt_sprintf_buf); + faultstr = alloca(len); + + (void) snprintf(faultstr, len, "couldn't freopen() \"%s\": %s", + dtp->dt_sprintf_buf, strerror(errno)); + + if ((errval = dt_handle_liberr(dtp, data, faultstr)) == 0) + return (rval); + + return (errval); + } + + if (dtp->dt_freopen_fp != NULL) + (void) fclose(dtp->dt_freopen_fp); + + /* Remember that the output has been redirected to the new file. */ + dtp->dt_freopen_fp = nfp; +#endif + + return (rval); +} + +/*ARGSUSED*/ +int +dtrace_fprintf(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata, + const dtrace_probedata_t *data, const dtrace_recdesc_t *recp, + uint_t nrecs, const void *buf, size_t len) +{ + return (dt_printf_format(dtp, fp, fmtdata, + recp, nrecs, buf, len, NULL, 0)); +} + +void * +dtrace_printf_create(dtrace_hdl_t *dtp, const char *s) +{ + dt_pfargv_t *pfv = dt_printf_create(dtp, s); + dt_pfargd_t *pfd; + int i; + + if (pfv == NULL) + return (NULL); /* errno has been set for us */ + + pfd = pfv->pfv_argv; + + for (i = 0; i < pfv->pfv_argc; i++, pfd = pfd->pfd_next) { + const dt_pfconv_t *pfc = pfd->pfd_conv; + + if (pfc == NULL) + continue; + + /* + * If the output format is not %s then we assume that we have + * been given a correctly-sized format string, so we copy the + * true format name including the size modifier. If the output + * format is %s, then either the input format is %s as well or + * it is one of our custom formats (e.g. pfprint_addr), so we + * must set pfd_fmt to be the output format conversion "s". + */ + if (strcmp(pfc->pfc_ofmt, "s") != 0) + (void) strcat(pfd->pfd_fmt, pfc->pfc_name); + else + (void) strcat(pfd->pfd_fmt, pfc->pfc_ofmt); + } + + return (pfv); +} + +void * +dtrace_printa_create(dtrace_hdl_t *dtp, const char *s) +{ + dt_pfargv_t *pfv = dtrace_printf_create(dtp, s); + + if (pfv == NULL) + return (NULL); /* errno has been set for us */ + + pfv->pfv_flags |= DT_PRINTF_AGGREGATION; + + return (pfv); +} + +/*ARGSUSED*/ +size_t +dtrace_printf_format(dtrace_hdl_t *dtp, void *fmtdata, char *s, size_t len) +{ + dt_pfargv_t *pfv = fmtdata; + dt_pfargd_t *pfd = pfv->pfv_argv; + + /* + * An upper bound on the string length is the length of the original + * format string, plus three times the number of conversions (each + * conversion could add up an additional "ll" and/or pfd_width digit + * in the case of converting %? to %16) plus one for a terminating \0. + */ + size_t formatlen = strlen(pfv->pfv_format) + 3 * pfv->pfv_argc + 1; + char *format = alloca(formatlen); + char *f = format; + int i, j; + + for (i = 0; i < pfv->pfv_argc; i++, pfd = pfd->pfd_next) { + const dt_pfconv_t *pfc = pfd->pfd_conv; + const char *str; + int width = pfd->pfd_width; + int prec = pfd->pfd_prec; + + if (pfd->pfd_preflen != 0) { + for (j = 0; j < pfd->pfd_preflen; j++) + *f++ = pfd->pfd_prefix[j]; + } + + if (pfc == NULL) + continue; + + *f++ = '%'; + + if (pfd->pfd_flags & DT_PFCONV_ALT) + *f++ = '#'; + if (pfd->pfd_flags & DT_PFCONV_ZPAD) + *f++ = '0'; + if (pfd->pfd_flags & DT_PFCONV_LEFT) + *f++ = '-'; + if (pfd->pfd_flags & DT_PFCONV_SPOS) + *f++ = '+'; + if (pfd->pfd_flags & DT_PFCONV_DYNWIDTH) + *f++ = '*'; + if (pfd->pfd_flags & DT_PFCONV_DYNPREC) { + *f++ = '.'; + *f++ = '*'; + } + if (pfd->pfd_flags & DT_PFCONV_GROUP) + *f++ = '\''; + if (pfd->pfd_flags & DT_PFCONV_SPACE) + *f++ = ' '; + if (pfd->pfd_flags & DT_PFCONV_AGG) + *f++ = '@'; + + if (width != 0) + f += snprintf(f, sizeof (format), "%d", width); + + if (prec != 0) + f += snprintf(f, sizeof (format), ".%d", prec); + + /* + * If the output format is %s, then either %s is the underlying + * conversion or the conversion is one of our customized ones, + * e.g. pfprint_addr. In these cases, put the original string + * name of the conversion (pfc_name) into the pickled format + * string rather than the derived conversion (pfd_fmt). + */ + if (strcmp(pfc->pfc_ofmt, "s") == 0) + str = pfc->pfc_name; + else + str = pfd->pfd_fmt; + + for (j = 0; str[j] != '\0'; j++) + *f++ = str[j]; + } + + *f = '\0'; /* insert nul byte; do not count in return value */ + + assert(f < format + formatlen); + (void) strncpy(s, format, len); + + return ((size_t)(f - format)); +} + +static int +dt_fprinta(const dtrace_aggdata_t *adp, void *arg) +{ + const dtrace_aggdesc_t *agg = adp->dtada_desc; + const dtrace_recdesc_t *recp = &agg->dtagd_rec[0]; + uint_t nrecs = agg->dtagd_nrecs; + dt_pfwalk_t *pfw = arg; + dtrace_hdl_t *dtp = pfw->pfw_argv->pfv_dtp; + int id; + + if (dt_printf_getint(dtp, recp++, nrecs--, + adp->dtada_data, adp->dtada_size, &id) != 0 || pfw->pfw_aid != id) + return (0); /* no aggregation id or id does not match */ + + if (dt_printf_format(dtp, pfw->pfw_fp, pfw->pfw_argv, + recp, nrecs, adp->dtada_data, adp->dtada_size, &adp, 1) == -1) + return (pfw->pfw_err = dtp->dt_errno); + + /* + * Cast away the const to set the bit indicating that this aggregation + * has been printed. + */ + ((dtrace_aggdesc_t *)agg)->dtagd_flags |= DTRACE_AGD_PRINTED; + + return (0); +} + +static int +dt_fprintas(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg) +{ + const dtrace_aggdata_t *aggdata = aggsdata[0]; + const dtrace_aggdesc_t *agg = aggdata->dtada_desc; + const dtrace_recdesc_t *rec = &agg->dtagd_rec[1]; + uint_t nrecs = agg->dtagd_nrecs - 1; + dt_pfwalk_t *pfw = arg; + dtrace_hdl_t *dtp = pfw->pfw_argv->pfv_dtp; + int i; + + if (dt_printf_format(dtp, pfw->pfw_fp, pfw->pfw_argv, + rec, nrecs, aggdata->dtada_data, aggdata->dtada_size, + aggsdata, naggvars) == -1) + return (pfw->pfw_err = dtp->dt_errno); + + /* + * For each aggregation, indicate that it has been printed, casting + * away the const as necessary. + */ + for (i = 1; i < naggvars; i++) { + agg = aggsdata[i]->dtada_desc; + ((dtrace_aggdesc_t *)agg)->dtagd_flags |= DTRACE_AGD_PRINTED; + } + + return (0); +} +/*ARGSUSED*/ +int +dtrace_fprinta(dtrace_hdl_t *dtp, FILE *fp, void *fmtdata, + const dtrace_probedata_t *data, const dtrace_recdesc_t *recs, + uint_t nrecs, const void *buf, size_t len) +{ + dt_pfwalk_t pfw; + int i, naggvars = 0; + dtrace_aggvarid_t *aggvars; + + aggvars = alloca(nrecs * sizeof (dtrace_aggvarid_t)); + + /* + * This might be a printa() with multiple aggregation variables. We + * need to scan forward through the records until we find a record from + * a different statement. + */ + for (i = 0; i < nrecs; i++) { + const dtrace_recdesc_t *nrec = &recs[i]; + + if (nrec->dtrd_uarg != recs->dtrd_uarg) + break; + + if (nrec->dtrd_action != recs->dtrd_action) + return (dt_set_errno(dtp, EDT_BADAGG)); + + aggvars[naggvars++] = + /* LINTED - alignment */ + *((dtrace_aggvarid_t *)((caddr_t)buf + nrec->dtrd_offset)); + } + + if (naggvars == 0) + return (dt_set_errno(dtp, EDT_BADAGG)); + + pfw.pfw_argv = fmtdata; + pfw.pfw_fp = fp; + pfw.pfw_err = 0; + + if (naggvars == 1) { + pfw.pfw_aid = aggvars[0]; + + if (dtrace_aggregate_walk_sorted(dtp, + dt_fprinta, &pfw) == -1 || pfw.pfw_err != 0) + return (-1); /* errno is set for us */ + } else { + if (dtrace_aggregate_walk_joined(dtp, aggvars, naggvars, + dt_fprintas, &pfw) == -1 || pfw.pfw_err != 0) + return (-1); /* errno is set for us */ + } + + return (i); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.h new file mode 100644 index 0000000..b3b5b8b --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_printf.h @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_PRINTF_H +#define _DT_PRINTF_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <libctf.h> +#include <dtrace.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dt_node; +struct dt_ident; + +struct dt_pfconv; +struct dt_pfargv; +struct dt_pfargd; + +typedef int dt_pfcheck_f(struct dt_pfargv *, + struct dt_pfargd *, struct dt_node *); +typedef int dt_pfprint_f(dtrace_hdl_t *, FILE *, const char *, + const struct dt_pfargd *, const void *, size_t, uint64_t); + +typedef struct dt_pfconv { + const char *pfc_name; /* string name of input conversion */ + const char *pfc_ofmt; /* string name of output conversion */ + const char *pfc_tstr; /* string name for conversion type */ + dt_pfcheck_f *pfc_check; /* function to use for type checking */ + dt_pfprint_f *pfc_print; /* function to use for formatting */ + ctf_file_t *pfc_cctfp; /* CTF container for "C" defn of type */ + ctf_id_t pfc_ctype; /* CTF type ID for "C" defn of type */ + ctf_file_t *pfc_dctfp; /* CTF container for "D" defn of type */ + ctf_id_t pfc_dtype; /* CTF type ID for "D" defn of type */ + struct dt_pfconv *pfc_next; /* next conversion in hash chain */ +} dt_pfconv_t; + +typedef struct dt_pfdict { + dt_pfconv_t **pdi_buckets; /* hash bucket array */ + uint_t pdi_nbuckets; /* size of hash bucket array */ +} dt_pfdict_t; + +typedef struct dt_pfargd { + const char *pfd_prefix; /* prefix string pointer (or NULL) */ + size_t pfd_preflen; /* length of prefix in bytes */ + char pfd_fmt[8]; /* output format name to use */ + uint_t pfd_flags; /* format flags (see below) */ + int pfd_width; /* field width (or 0) */ + int pfd_dynwidth; /* dynamic field width (or 0) */ + int pfd_prec; /* field precision (or 0) */ + const dt_pfconv_t *pfd_conv; /* conversion specification */ + const dtrace_recdesc_t *pfd_rec; /* pointer to current record */ + struct dt_pfargd *pfd_next; /* pointer to next arg descriptor */ +} dt_pfargd_t; + +#define DT_PFCONV_ALT 0x0001 /* alternate print format (%#) */ +#define DT_PFCONV_ZPAD 0x0002 /* zero-pad integer field (%0) */ +#define DT_PFCONV_LEFT 0x0004 /* left-align field (%-) */ +#define DT_PFCONV_SPOS 0x0008 /* sign positive values (%+) */ +#define DT_PFCONV_DYNWIDTH 0x0010 /* dynamic width (%*.) */ +#define DT_PFCONV_DYNPREC 0x0020 /* dynamic precision (%.*) */ +#define DT_PFCONV_GROUP 0x0040 /* group thousands (%') */ +#define DT_PFCONV_SPACE 0x0080 /* insert leading space (% ) */ +#define DT_PFCONV_AGG 0x0100 /* use aggregation result (%@) */ +#define DT_PFCONV_SIGNED 0x0200 /* arg is a signed integer */ + +typedef struct dt_pfargv { + dtrace_hdl_t *pfv_dtp; /* libdtrace client handle */ + char *pfv_format; /* format string pointer */ + dt_pfargd_t *pfv_argv; /* list of argument descriptors */ + uint_t pfv_argc; /* number of argument descriptors */ + uint_t pfv_flags; /* flags used for validation */ +} dt_pfargv_t; + +typedef struct dt_pfwalk { + const dt_pfargv_t *pfw_argv; /* argument description list */ + uint_t pfw_aid; /* aggregation variable identifier */ + FILE *pfw_fp; /* file pointer to use for output */ + int pfw_err; /* error status code */ +} dt_pfwalk_t; + +extern int dt_pfdict_create(dtrace_hdl_t *); +extern void dt_pfdict_destroy(dtrace_hdl_t *); + +extern dt_pfargv_t *dt_printf_create(dtrace_hdl_t *, const char *); +extern void dt_printf_destroy(dt_pfargv_t *); + +#define DT_PRINTF_EXACTLEN 0x1 /* do not permit extra arguments */ +#define DT_PRINTF_AGGREGATION 0x2 /* enable aggregation conversion */ + +extern void dt_printf_validate(dt_pfargv_t *, uint_t, + struct dt_ident *, int, dtrace_actkind_t, struct dt_node *); + +extern void dt_printa_validate(struct dt_node *, struct dt_node *); + +extern int dt_print_stack(dtrace_hdl_t *, FILE *, + const char *, caddr_t, int, int); +extern int dt_print_ustack(dtrace_hdl_t *, FILE *, + const char *, caddr_t, uint64_t); +extern int dt_print_mod(dtrace_hdl_t *, FILE *, const char *, caddr_t); +extern int dt_print_umod(dtrace_hdl_t *, FILE *, const char *, caddr_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_PRINTF_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c new file mode 100644 index 0000000..d40a0ae --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c @@ -0,0 +1,1209 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * DTrace Process Control + * + * This file provides a set of routines that permit libdtrace and its clients + * to create and grab process handles using libproc, and to share these handles + * between library mechanisms that need libproc access, such as ustack(), and + * client mechanisms that need libproc access, such as dtrace(1M) -c and -p. + * The library provides several mechanisms in the libproc control layer: + * + * Reference Counting: The library code and client code can independently grab + * the same process handles without interfering with one another. Only when + * the reference count drops to zero and the handle is not being cached (see + * below for more information on caching) will Prelease() be called on it. + * + * Handle Caching: If a handle is grabbed PGRAB_RDONLY (e.g. by ustack()) and + * the reference count drops to zero, the handle is not immediately released. + * Instead, libproc handles are maintained on dph_lrulist in order from most- + * recently accessed to least-recently accessed. Idle handles are maintained + * until a pre-defined LRU cache limit is exceeded, permitting repeated calls + * to ustack() to avoid the overhead of releasing and re-grabbing processes. + * + * Process Control: For processes that are grabbed for control (~PGRAB_RDONLY) + * or created by dt_proc_create(), a control thread is created to provide + * callbacks on process exit and symbol table caching on dlopen()s. + * + * MT-Safety: Libproc is not MT-Safe, so dt_proc_lock() and dt_proc_unlock() + * are provided to synchronize access to the libproc handle between libdtrace + * code and client code and the control thread's use of the ps_prochandle. + * + * NOTE: MT-Safety is NOT provided for libdtrace itself, or for use of the + * dtrace_proc_grab/dtrace_proc_create mechanisms. Like all exported libdtrace + * calls, these are assumed to be MT-Unsafe. MT-Safety is ONLY provided for + * synchronization between libdtrace control threads and the client thread. + * + * The ps_prochandles themselves are maintained along with a dt_proc_t struct + * in a hash table indexed by PID. This provides basic locking and reference + * counting. The dt_proc_t is also maintained in LRU order on dph_lrulist. + * The dph_lrucnt and dph_lrulim count the number of cacheable processes and + * the current limit on the number of actively cached entries. + * + * The control thread for a process establishes breakpoints at the rtld_db + * locations of interest, updates mappings and symbol tables at these points, + * and handles exec and fork (by always following the parent). The control + * thread automatically exits when the process dies or control is lost. + * + * A simple notification mechanism is provided for libdtrace clients using + * dtrace_handle_proc() for notification of PS_UNDEAD or PS_LOST events. If + * such an event occurs, the dt_proc_t itself is enqueued on a notification + * list and the control thread broadcasts to dph_cv. dtrace_sleep() will wake + * up using this condition and will then call the client handler as necessary. + */ + +#include <sys/wait.h> +#if defined(sun) +#include <sys/lwp.h> +#endif +#include <strings.h> +#include <signal.h> +#include <assert.h> +#include <errno.h> + +#include <dt_proc.h> +#include <dt_pid.h> +#include <dt_impl.h> + +#if !defined(sun) +#include <sys/syscall.h> +#include <libproc_compat.h> +#define SYS_forksys SYS_fork +#endif + +#define IS_SYS_EXEC(w) (w == SYS_execve) +#define IS_SYS_FORK(w) (w == SYS_vfork || w == SYS_forksys) + +static dt_bkpt_t * +dt_proc_bpcreate(dt_proc_t *dpr, uintptr_t addr, dt_bkpt_f *func, void *data) +{ + struct ps_prochandle *P = dpr->dpr_proc; + dt_bkpt_t *dbp; + + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); + + if ((dbp = dt_zalloc(dpr->dpr_hdl, sizeof (dt_bkpt_t))) != NULL) { + dbp->dbp_func = func; + dbp->dbp_data = data; + dbp->dbp_addr = addr; + + if (Psetbkpt(P, dbp->dbp_addr, &dbp->dbp_instr) == 0) + dbp->dbp_active = B_TRUE; + + dt_list_append(&dpr->dpr_bps, dbp); + } + + return (dbp); +} + +static void +dt_proc_bpdestroy(dt_proc_t *dpr, int delbkpts) +{ + int state = Pstate(dpr->dpr_proc); + dt_bkpt_t *dbp, *nbp; + + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); + + for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = nbp) { + if (delbkpts && dbp->dbp_active && + state != PS_LOST && state != PS_UNDEAD) { + (void) Pdelbkpt(dpr->dpr_proc, + dbp->dbp_addr, dbp->dbp_instr); + } + nbp = dt_list_next(dbp); + dt_list_delete(&dpr->dpr_bps, dbp); + dt_free(dpr->dpr_hdl, dbp); + } +} + +static void +dt_proc_bpmatch(dtrace_hdl_t *dtp, dt_proc_t *dpr) +{ +#if defined(sun) + const lwpstatus_t *psp = &Pstatus(dpr->dpr_proc)->pr_lwp; +#else + unsigned long pc; +#endif + dt_bkpt_t *dbp; + + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); + +#if !defined(sun) + proc_regget(dpr->dpr_proc, REG_PC, &pc); + proc_bkptregadj(&pc); +#endif + + for (dbp = dt_list_next(&dpr->dpr_bps); + dbp != NULL; dbp = dt_list_next(dbp)) { +#if defined(sun) + if (psp->pr_reg[R_PC] == dbp->dbp_addr) + break; +#else + if (pc == dbp->dbp_addr) + break; +#endif + } + + if (dbp == NULL) { + dt_dprintf("pid %d: spurious breakpoint wakeup for %lx\n", +#if defined(sun) + (int)dpr->dpr_pid, (ulong_t)psp->pr_reg[R_PC]); +#else + (int)dpr->dpr_pid, pc); +#endif + return; + } + + dt_dprintf("pid %d: hit breakpoint at %lx (%lu)\n", + (int)dpr->dpr_pid, (ulong_t)dbp->dbp_addr, ++dbp->dbp_hits); + + dbp->dbp_func(dtp, dpr, dbp->dbp_data); + (void) Pxecbkpt(dpr->dpr_proc, dbp->dbp_instr); +} + +static void +dt_proc_bpenable(dt_proc_t *dpr) +{ + dt_bkpt_t *dbp; + + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); + + for (dbp = dt_list_next(&dpr->dpr_bps); + dbp != NULL; dbp = dt_list_next(dbp)) { + if (!dbp->dbp_active && Psetbkpt(dpr->dpr_proc, + dbp->dbp_addr, &dbp->dbp_instr) == 0) + dbp->dbp_active = B_TRUE; + } + + dt_dprintf("breakpoints enabled\n"); +} + +static void +dt_proc_bpdisable(dt_proc_t *dpr) +{ + dt_bkpt_t *dbp; + + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); + + for (dbp = dt_list_next(&dpr->dpr_bps); + dbp != NULL; dbp = dt_list_next(dbp)) { + if (dbp->dbp_active && Pdelbkpt(dpr->dpr_proc, + dbp->dbp_addr, dbp->dbp_instr) == 0) + dbp->dbp_active = B_FALSE; + } + + dt_dprintf("breakpoints disabled\n"); +} + +static void +dt_proc_notify(dtrace_hdl_t *dtp, dt_proc_hash_t *dph, dt_proc_t *dpr, + const char *msg) +{ + dt_proc_notify_t *dprn = dt_alloc(dtp, sizeof (dt_proc_notify_t)); + + if (dprn == NULL) { + dt_dprintf("failed to allocate notification for %d %s\n", + (int)dpr->dpr_pid, msg); + } else { + dprn->dprn_dpr = dpr; + if (msg == NULL) + dprn->dprn_errmsg[0] = '\0'; + else + (void) strlcpy(dprn->dprn_errmsg, msg, + sizeof (dprn->dprn_errmsg)); + + (void) pthread_mutex_lock(&dph->dph_lock); + + dprn->dprn_next = dph->dph_notify; + dph->dph_notify = dprn; + + (void) pthread_cond_broadcast(&dph->dph_cv); + (void) pthread_mutex_unlock(&dph->dph_lock); + } +} + +/* + * Check to see if the control thread was requested to stop when the victim + * process reached a particular event (why) rather than continuing the victim. + * If 'why' is set in the stop mask, we wait on dpr_cv for dt_proc_continue(). + * If 'why' is not set, this function returns immediately and does nothing. + */ +static void +dt_proc_stop(dt_proc_t *dpr, uint8_t why) +{ + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); + assert(why != DT_PROC_STOP_IDLE); + + if (dpr->dpr_stop & why) { + dpr->dpr_stop |= DT_PROC_STOP_IDLE; + dpr->dpr_stop &= ~why; + + (void) pthread_cond_broadcast(&dpr->dpr_cv); + + /* + * We disable breakpoints while stopped to preserve the + * integrity of the program text for both our own disassembly + * and that of the kernel. + */ + dt_proc_bpdisable(dpr); + + while (dpr->dpr_stop & DT_PROC_STOP_IDLE) + (void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock); + + dt_proc_bpenable(dpr); + } +} + +/*ARGSUSED*/ +static void +dt_proc_bpmain(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *fname) +{ + dt_dprintf("pid %d: breakpoint at %s()\n", (int)dpr->dpr_pid, fname); + dt_proc_stop(dpr, DT_PROC_STOP_MAIN); +} + +static void +dt_proc_rdevent(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *evname) +{ + rd_event_msg_t rdm; + rd_err_e err; + + if ((err = rd_event_getmsg(dpr->dpr_rtld, &rdm)) != RD_OK) { + dt_dprintf("pid %d: failed to get %s event message: %s\n", + (int)dpr->dpr_pid, evname, rd_errstr(err)); + return; + } + + dt_dprintf("pid %d: rtld event %s type=%d state %d\n", + (int)dpr->dpr_pid, evname, rdm.type, rdm.u.state); + + switch (rdm.type) { + case RD_DLACTIVITY: + if (rdm.u.state != RD_CONSISTENT) + break; + + Pupdate_syms(dpr->dpr_proc); + if (dt_pid_create_probes_module(dtp, dpr) != 0) + dt_proc_notify(dtp, dtp->dt_procs, dpr, + dpr->dpr_errmsg); + + break; + case RD_PREINIT: + Pupdate_syms(dpr->dpr_proc); + dt_proc_stop(dpr, DT_PROC_STOP_PREINIT); + break; + case RD_POSTINIT: + Pupdate_syms(dpr->dpr_proc); + dt_proc_stop(dpr, DT_PROC_STOP_POSTINIT); + break; + } +} + +static void +dt_proc_rdwatch(dt_proc_t *dpr, rd_event_e event, const char *evname) +{ + rd_notify_t rdn; + rd_err_e err; + + if ((err = rd_event_addr(dpr->dpr_rtld, event, &rdn)) != RD_OK) { + dt_dprintf("pid %d: failed to get event address for %s: %s\n", + (int)dpr->dpr_pid, evname, rd_errstr(err)); + return; + } + + if (rdn.type != RD_NOTIFY_BPT) { + dt_dprintf("pid %d: event %s has unexpected type %d\n", + (int)dpr->dpr_pid, evname, rdn.type); + return; + } + + (void) dt_proc_bpcreate(dpr, rdn.u.bptaddr, +#if defined(sun) + (dt_bkpt_f *)dt_proc_rdevent, (void *)evname); +#else + /* XXX ugly */ + (dt_bkpt_f *)dt_proc_rdevent, __DECONST(void *, evname)); +#endif +} + +/* + * Common code for enabling events associated with the run-time linker after + * attaching to a process or after a victim process completes an exec(2). + */ +static void +dt_proc_attach(dt_proc_t *dpr, int exec) +{ +#if defined(sun) + const pstatus_t *psp = Pstatus(dpr->dpr_proc); +#endif + rd_err_e err; + GElf_Sym sym; + + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); + + if (exec) { +#if defined(sun) + if (psp->pr_lwp.pr_errno != 0) + return; /* exec failed: nothing needs to be done */ +#endif + + dt_proc_bpdestroy(dpr, B_FALSE); +#if defined(sun) + Preset_maps(dpr->dpr_proc); +#endif + } + if ((dpr->dpr_rtld = Prd_agent(dpr->dpr_proc)) != NULL && + (err = rd_event_enable(dpr->dpr_rtld, B_TRUE)) == RD_OK) { +#if defined(sun) + dt_proc_rdwatch(dpr, RD_PREINIT, "RD_PREINIT"); +#endif + dt_proc_rdwatch(dpr, RD_POSTINIT, "RD_POSTINIT"); +#if defined(sun) + dt_proc_rdwatch(dpr, RD_DLACTIVITY, "RD_DLACTIVITY"); +#endif + } else { + dt_dprintf("pid %d: failed to enable rtld events: %s\n", + (int)dpr->dpr_pid, dpr->dpr_rtld ? rd_errstr(err) : + "rtld_db agent initialization failed"); + } + + Pupdate_maps(dpr->dpr_proc); + + if (Pxlookup_by_name(dpr->dpr_proc, LM_ID_BASE, + "a.out", "main", &sym, NULL) == 0) { + (void) dt_proc_bpcreate(dpr, (uintptr_t)sym.st_value, + (dt_bkpt_f *)dt_proc_bpmain, "a.out`main"); + } else { + dt_dprintf("pid %d: failed to find a.out`main: %s\n", + (int)dpr->dpr_pid, strerror(errno)); + } +} + +/* + * Wait for a stopped process to be set running again by some other debugger. + * This is typically not required by /proc-based debuggers, since the usual + * model is that one debugger controls one victim. But DTrace, as usual, has + * its own needs: the stop() action assumes that prun(1) or some other tool + * will be applied to resume the victim process. This could be solved by + * adding a PCWRUN directive to /proc, but that seems like overkill unless + * other debuggers end up needing this functionality, so we implement a cheap + * equivalent to PCWRUN using the set of existing kernel mechanisms. + * + * Our intent is really not just to wait for the victim to run, but rather to + * wait for it to run and then stop again for a reason other than the current + * PR_REQUESTED stop. Since PCWSTOP/Pstopstatus() can be applied repeatedly + * to a stopped process and will return the same result without affecting the + * victim, we can just perform these operations repeatedly until Pstate() + * changes, the representative LWP ID changes, or the stop timestamp advances. + * dt_proc_control() will then rediscover the new state and continue as usual. + * When the process is still stopped in the same exact state, we sleep for a + * brief interval before waiting again so as not to spin consuming CPU cycles. + */ +static void +dt_proc_waitrun(dt_proc_t *dpr) +{ +printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__); +#ifdef DOODAD + struct ps_prochandle *P = dpr->dpr_proc; + const lwpstatus_t *psp = &Pstatus(P)->pr_lwp; + + int krflag = psp->pr_flags & (PR_KLC | PR_RLC); + timestruc_t tstamp = psp->pr_tstamp; + lwpid_t lwpid = psp->pr_lwpid; + + const long wstop = PCWSTOP; + int pfd = Pctlfd(P); + + assert(DT_MUTEX_HELD(&dpr->dpr_lock)); + assert(psp->pr_flags & PR_STOPPED); + assert(Pstate(P) == PS_STOP); + + /* + * While we are waiting for the victim to run, clear PR_KLC and PR_RLC + * so that if the libdtrace client is killed, the victim stays stopped. + * dt_proc_destroy() will also observe this and perform PRELEASE_HANG. + */ + (void) Punsetflags(P, krflag); + Psync(P); + + (void) pthread_mutex_unlock(&dpr->dpr_lock); + + while (!dpr->dpr_quit) { + if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR) + continue; /* check dpr_quit and continue waiting */ + + (void) pthread_mutex_lock(&dpr->dpr_lock); + (void) Pstopstatus(P, PCNULL, 0); + psp = &Pstatus(P)->pr_lwp; + + /* + * If we've reached a new state, found a new representative, or + * the stop timestamp has changed, restore PR_KLC/PR_RLC to its + * original setting and then return with dpr_lock held. + */ + if (Pstate(P) != PS_STOP || psp->pr_lwpid != lwpid || + bcmp(&psp->pr_tstamp, &tstamp, sizeof (tstamp)) != 0) { + (void) Psetflags(P, krflag); + Psync(P); + return; + } + + (void) pthread_mutex_unlock(&dpr->dpr_lock); + (void) poll(NULL, 0, MILLISEC / 2); + } + + (void) pthread_mutex_lock(&dpr->dpr_lock); +#endif +} + +typedef struct dt_proc_control_data { + dtrace_hdl_t *dpcd_hdl; /* DTrace handle */ + dt_proc_t *dpcd_proc; /* proccess to control */ +} dt_proc_control_data_t; + +/* + * Main loop for all victim process control threads. We initialize all the + * appropriate /proc control mechanisms, and then enter a loop waiting for + * the process to stop on an event or die. We process any events by calling + * appropriate subroutines, and exit when the victim dies or we lose control. + * + * The control thread synchronizes the use of dpr_proc with other libdtrace + * threads using dpr_lock. We hold the lock for all of our operations except + * waiting while the process is running: this is accomplished by writing a + * PCWSTOP directive directly to the underlying /proc/<pid>/ctl file. If the + * libdtrace client wishes to exit or abort our wait, SIGCANCEL can be used. + */ +static void * +dt_proc_control(void *arg) +{ + dt_proc_control_data_t *datap = arg; + dtrace_hdl_t *dtp = datap->dpcd_hdl; + dt_proc_t *dpr = datap->dpcd_proc; + dt_proc_hash_t *dph = dpr->dpr_hdl->dt_procs; + struct ps_prochandle *P = dpr->dpr_proc; + int pid = dpr->dpr_pid; + +#if defined(sun) + int pfd = Pctlfd(P); + + const long wstop = PCWSTOP; +#endif + int notify = B_FALSE; + + /* + * We disable the POSIX thread cancellation mechanism so that the + * client program using libdtrace can't accidentally cancel our thread. + * dt_proc_destroy() uses SIGCANCEL explicitly to simply poke us out + * of PCWSTOP with EINTR, at which point we will see dpr_quit and exit. + */ + (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + + /* + * Set up the corresponding process for tracing by libdtrace. We want + * to be able to catch breakpoints and efficiently single-step over + * them, and we need to enable librtld_db to watch libdl activity. + */ + (void) pthread_mutex_lock(&dpr->dpr_lock); + +#if defined(sun) + (void) Punsetflags(P, PR_ASYNC); /* require synchronous mode */ + (void) Psetflags(P, PR_BPTADJ); /* always adjust eip on x86 */ + (void) Punsetflags(P, PR_FORK); /* do not inherit on fork */ + + (void) Pfault(P, FLTBPT, B_TRUE); /* always trace breakpoints */ + (void) Pfault(P, FLTTRACE, B_TRUE); /* always trace single-step */ + + /* + * We must trace exit from exec() system calls so that if the exec is + * successful, we can reset our breakpoints and re-initialize libproc. + */ + (void) Psysexit(P, SYS_execve, B_TRUE); + + /* + * We must trace entry and exit for fork() system calls in order to + * disable our breakpoints temporarily during the fork. We do not set + * the PR_FORK flag, so if fork succeeds the child begins executing and + * does not inherit any other tracing behaviors or a control thread. + */ + (void) Psysentry(P, SYS_vfork, B_TRUE); + (void) Psysexit(P, SYS_vfork, B_TRUE); + (void) Psysentry(P, SYS_forksys, B_TRUE); + (void) Psysexit(P, SYS_forksys, B_TRUE); + + Psync(P); /* enable all /proc changes */ +#endif + dt_proc_attach(dpr, B_FALSE); /* enable rtld breakpoints */ + + /* + * If PR_KLC is set, we created the process; otherwise we grabbed it. + * Check for an appropriate stop request and wait for dt_proc_continue. + */ +#if defined(sun) + if (Pstatus(P)->pr_flags & PR_KLC) +#else + if (proc_getflags(P) & PR_KLC) +#endif + dt_proc_stop(dpr, DT_PROC_STOP_CREATE); + else + dt_proc_stop(dpr, DT_PROC_STOP_GRAB); + + if (Psetrun(P, 0, 0) == -1) { + dt_dprintf("pid %d: failed to set running: %s\n", + (int)dpr->dpr_pid, strerror(errno)); + } + + (void) pthread_mutex_unlock(&dpr->dpr_lock); + + /* + * Wait for the process corresponding to this control thread to stop, + * process the event, and then set it running again. We want to sleep + * with dpr_lock *unheld* so that other parts of libdtrace can use the + * ps_prochandle in the meantime (e.g. ustack()). To do this, we write + * a PCWSTOP directive directly to the underlying /proc/<pid>/ctl file. + * Once the process stops, we wake up, grab dpr_lock, and then call + * Pwait() (which will return immediately) and do our processing. + */ + while (!dpr->dpr_quit) { + const lwpstatus_t *psp; + +#if defined(sun) + if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR) + continue; /* check dpr_quit and continue waiting */ +#else + /* Wait for the process to report status. */ + proc_wstatus(P); + if (errno == EINTR) + continue; /* check dpr_quit and continue waiting */ +#endif + + (void) pthread_mutex_lock(&dpr->dpr_lock); + +#if defined(sun) +pwait_locked: + if (Pstopstatus(P, PCNULL, 0) == -1 && errno == EINTR) { + (void) pthread_mutex_unlock(&dpr->dpr_lock); + continue; /* check dpr_quit and continue waiting */ + } +#endif + + switch (Pstate(P)) { + case PS_STOP: +#if defined(sun) + psp = &Pstatus(P)->pr_lwp; +#else + psp = proc_getlwpstatus(P); +#endif + + dt_dprintf("pid %d: proc stopped showing %d/%d\n", + pid, psp->pr_why, psp->pr_what); + + /* + * If the process stops showing PR_REQUESTED, then the + * DTrace stop() action was applied to it or another + * debugging utility (e.g. pstop(1)) asked it to stop. + * In either case, the user's intention is for the + * process to remain stopped until another external + * mechanism (e.g. prun(1)) is applied. So instead of + * setting the process running ourself, we wait for + * someone else to do so. Once that happens, we return + * to our normal loop waiting for an event of interest. + */ + if (psp->pr_why == PR_REQUESTED) { + dt_proc_waitrun(dpr); + (void) pthread_mutex_unlock(&dpr->dpr_lock); + continue; + } + + /* + * If the process stops showing one of the events that + * we are tracing, perform the appropriate response. + * Note that we ignore PR_SUSPENDED, PR_CHECKPOINT, and + * PR_JOBCONTROL by design: if one of these conditions + * occurs, we will fall through to Psetrun() but the + * process will remain stopped in the kernel by the + * corresponding mechanism (e.g. job control stop). + */ + if (psp->pr_why == PR_FAULTED && psp->pr_what == FLTBPT) + dt_proc_bpmatch(dtp, dpr); + else if (psp->pr_why == PR_SYSENTRY && + IS_SYS_FORK(psp->pr_what)) + dt_proc_bpdisable(dpr); + else if (psp->pr_why == PR_SYSEXIT && + IS_SYS_FORK(psp->pr_what)) + dt_proc_bpenable(dpr); + else if (psp->pr_why == PR_SYSEXIT && + IS_SYS_EXEC(psp->pr_what)) + dt_proc_attach(dpr, B_TRUE); + break; + + case PS_LOST: +#if defined(sun) + if (Preopen(P) == 0) + goto pwait_locked; +#endif + + dt_dprintf("pid %d: proc lost: %s\n", + pid, strerror(errno)); + + dpr->dpr_quit = B_TRUE; + notify = B_TRUE; + break; + + case PS_UNDEAD: + dt_dprintf("pid %d: proc died\n", pid); + dpr->dpr_quit = B_TRUE; + notify = B_TRUE; + break; + } + + if (Pstate(P) != PS_UNDEAD && Psetrun(P, 0, 0) == -1) { + dt_dprintf("pid %d: failed to set running: %s\n", + (int)dpr->dpr_pid, strerror(errno)); + } + + (void) pthread_mutex_unlock(&dpr->dpr_lock); + } + + /* + * If the control thread detected PS_UNDEAD or PS_LOST, then enqueue + * the dt_proc_t structure on the dt_proc_hash_t notification list. + */ + if (notify) + dt_proc_notify(dtp, dph, dpr, NULL); + + /* + * Destroy and remove any remaining breakpoints, set dpr_done and clear + * dpr_tid to indicate the control thread has exited, and notify any + * waiting thread in dt_proc_destroy() that we have succesfully exited. + */ + (void) pthread_mutex_lock(&dpr->dpr_lock); + + dt_proc_bpdestroy(dpr, B_TRUE); + dpr->dpr_done = B_TRUE; + dpr->dpr_tid = 0; + + (void) pthread_cond_broadcast(&dpr->dpr_cv); + (void) pthread_mutex_unlock(&dpr->dpr_lock); + + return (NULL); +} + +/*PRINTFLIKE3*/ +static struct ps_prochandle * +dt_proc_error(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap); + va_end(ap); + + if (dpr->dpr_proc != NULL) + Prelease(dpr->dpr_proc, 0); + + dt_free(dtp, dpr); + (void) dt_set_errno(dtp, EDT_COMPILER); + return (NULL); +} + +dt_proc_t * +dt_proc_lookup(dtrace_hdl_t *dtp, struct ps_prochandle *P, int remove) +{ + dt_proc_hash_t *dph = dtp->dt_procs; +#if defined(sun) + pid_t pid = Pstatus(P)->pr_pid; +#else + pid_t pid = proc_getpid(P); +#endif + dt_proc_t *dpr, **dpp = &dph->dph_hash[pid & (dph->dph_hashlen - 1)]; + + for (dpr = *dpp; dpr != NULL; dpr = dpr->dpr_hash) { + if (dpr->dpr_pid == pid) + break; + else + dpp = &dpr->dpr_hash; + } + + assert(dpr != NULL); + assert(dpr->dpr_proc == P); + + if (remove) + *dpp = dpr->dpr_hash; /* remove from pid hash chain */ + + return (dpr); +} + +static void +dt_proc_destroy(dtrace_hdl_t *dtp, struct ps_prochandle *P) +{ + dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); + dt_proc_hash_t *dph = dtp->dt_procs; + dt_proc_notify_t *npr, **npp; + int rflag; + + assert(dpr != NULL); + + /* + * If neither PR_KLC nor PR_RLC is set, then the process is stopped by + * an external debugger and we were waiting in dt_proc_waitrun(). + * Leave the process in this condition using PRELEASE_HANG. + */ +#if defined(sun) + if (!(Pstatus(dpr->dpr_proc)->pr_flags & (PR_KLC | PR_RLC))) { +#else + if (!(proc_getflags(dpr->dpr_proc) & (PR_KLC | PR_RLC))) { +#endif + dt_dprintf("abandoning pid %d\n", (int)dpr->dpr_pid); + rflag = PRELEASE_HANG; +#if defined(sun) + } else if (Pstatus(dpr->dpr_proc)->pr_flags & PR_KLC) { +#else + } else if (proc_getflags(dpr->dpr_proc) & PR_KLC) { +#endif + dt_dprintf("killing pid %d\n", (int)dpr->dpr_pid); + rflag = PRELEASE_KILL; /* apply kill-on-last-close */ + } else { + dt_dprintf("releasing pid %d\n", (int)dpr->dpr_pid); + rflag = 0; /* apply run-on-last-close */ + } + + if (dpr->dpr_tid) { + /* + * Set the dpr_quit flag to tell the daemon thread to exit. We + * send it a SIGCANCEL to poke it out of PCWSTOP or any other + * long-term /proc system call. Our daemon threads have POSIX + * cancellation disabled, so EINTR will be the only effect. We + * then wait for dpr_done to indicate the thread has exited. + * + * We can't use pthread_kill() to send SIGCANCEL because the + * interface forbids it and we can't use pthread_cancel() + * because with cancellation disabled it won't actually + * send SIGCANCEL to the target thread, so we use _lwp_kill() + * to do the job. This is all built on evil knowledge of + * the details of the cancellation mechanism in libc. + */ + (void) pthread_mutex_lock(&dpr->dpr_lock); + dpr->dpr_quit = B_TRUE; +#if defined(sun) + (void) _lwp_kill(dpr->dpr_tid, SIGCANCEL); +#else + pthread_kill(dpr->dpr_tid, SIGTHR); +#endif + + /* + * If the process is currently idling in dt_proc_stop(), re- + * enable breakpoints and poke it into running again. + */ + if (dpr->dpr_stop & DT_PROC_STOP_IDLE) { + dt_proc_bpenable(dpr); + dpr->dpr_stop &= ~DT_PROC_STOP_IDLE; + (void) pthread_cond_broadcast(&dpr->dpr_cv); + } + + while (!dpr->dpr_done) + (void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock); + + (void) pthread_mutex_unlock(&dpr->dpr_lock); + } + + /* + * Before we free the process structure, remove this dt_proc_t from the + * lookup hash, and then walk the dt_proc_hash_t's notification list + * and remove this dt_proc_t if it is enqueued. + */ + (void) pthread_mutex_lock(&dph->dph_lock); + (void) dt_proc_lookup(dtp, P, B_TRUE); + npp = &dph->dph_notify; + + while ((npr = *npp) != NULL) { + if (npr->dprn_dpr == dpr) { + *npp = npr->dprn_next; + dt_free(dtp, npr); + } else { + npp = &npr->dprn_next; + } + } + + (void) pthread_mutex_unlock(&dph->dph_lock); + + /* + * Remove the dt_proc_list from the LRU list, release the underlying + * libproc handle, and free our dt_proc_t data structure. + */ + if (dpr->dpr_cacheable) { + assert(dph->dph_lrucnt != 0); + dph->dph_lrucnt--; + } + + dt_list_delete(&dph->dph_lrulist, dpr); + Prelease(dpr->dpr_proc, rflag); + dt_free(dtp, dpr); +} + +static int +dt_proc_create_thread(dtrace_hdl_t *dtp, dt_proc_t *dpr, uint_t stop) +{ + dt_proc_control_data_t data; + sigset_t nset, oset; + pthread_attr_t a; + int err; + + (void) pthread_mutex_lock(&dpr->dpr_lock); + dpr->dpr_stop |= stop; /* set bit for initial rendezvous */ + + (void) pthread_attr_init(&a); + (void) pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED); + + (void) sigfillset(&nset); + (void) sigdelset(&nset, SIGABRT); /* unblocked for assert() */ +#if defined(sun) + (void) sigdelset(&nset, SIGCANCEL); /* see dt_proc_destroy() */ +#else + (void) sigdelset(&nset, SIGUSR1); /* see dt_proc_destroy() */ +#endif + + data.dpcd_hdl = dtp; + data.dpcd_proc = dpr; + + (void) pthread_sigmask(SIG_SETMASK, &nset, &oset); + err = pthread_create(&dpr->dpr_tid, &a, dt_proc_control, &data); + (void) pthread_sigmask(SIG_SETMASK, &oset, NULL); + + /* + * If the control thread was created, then wait on dpr_cv for either + * dpr_done to be set (the victim died or the control thread failed) + * or DT_PROC_STOP_IDLE to be set, indicating that the victim is now + * stopped by /proc and the control thread is at the rendezvous event. + * On success, we return with the process and control thread stopped: + * the caller can then apply dt_proc_continue() to resume both. + */ + if (err == 0) { + while (!dpr->dpr_done && !(dpr->dpr_stop & DT_PROC_STOP_IDLE)) + (void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock); + + /* + * If dpr_done is set, the control thread aborted before it + * reached the rendezvous event. This is either due to PS_LOST + * or PS_UNDEAD (i.e. the process died). We try to provide a + * small amount of useful information to help figure it out. + */ + if (dpr->dpr_done) { +#if defined(sun) + const psinfo_t *prp = Ppsinfo(dpr->dpr_proc); + int stat = prp ? prp->pr_wstat : 0; + int pid = dpr->dpr_pid; +#else + int stat = proc_getwstat(dpr->dpr_proc); + int pid = proc_getpid(dpr->dpr_proc); +#endif + if (proc_state(dpr->dpr_proc) == PS_LOST) { + (void) dt_proc_error(dpr->dpr_hdl, dpr, + "failed to control pid %d: process exec'd " + "set-id or unobservable program\n", pid); + } else if (WIFSIGNALED(stat)) { + (void) dt_proc_error(dpr->dpr_hdl, dpr, + "failed to control pid %d: process died " + "from signal %d\n", pid, WTERMSIG(stat)); + } else { + (void) dt_proc_error(dpr->dpr_hdl, dpr, + "failed to control pid %d: process exited " + "with status %d\n", pid, WEXITSTATUS(stat)); + } + + err = ESRCH; /* cause grab() or create() to fail */ + } + } else { + (void) dt_proc_error(dpr->dpr_hdl, dpr, + "failed to create control thread for process-id %d: %s\n", + (int)dpr->dpr_pid, strerror(err)); + } + + if (err == 0) + (void) pthread_mutex_unlock(&dpr->dpr_lock); + (void) pthread_attr_destroy(&a); + + return (err); +} + +struct ps_prochandle * +dt_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv, + proc_child_func *pcf, void *child_arg) +{ + dt_proc_hash_t *dph = dtp->dt_procs; + dt_proc_t *dpr; + int err; + + if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL) + return (NULL); /* errno is set for us */ + + (void) pthread_mutex_init(&dpr->dpr_lock, NULL); + (void) pthread_cond_init(&dpr->dpr_cv, NULL); + +#if defined(sun) + if ((dpr->dpr_proc = Pcreate(file, argv, &err, NULL, 0)) == NULL) { +#else + if ((err = proc_create(file, argv, pcf, child_arg, + &dpr->dpr_proc)) != 0) { +#endif + return (dt_proc_error(dtp, dpr, + "failed to execute %s: %s\n", file, Pcreate_error(err))); + } + + dpr->dpr_hdl = dtp; +#if defined(sun) + dpr->dpr_pid = Pstatus(dpr->dpr_proc)->pr_pid; +#else + dpr->dpr_pid = proc_getpid(dpr->dpr_proc); +#endif + + (void) Punsetflags(dpr->dpr_proc, PR_RLC); + (void) Psetflags(dpr->dpr_proc, PR_KLC); + + if (dt_proc_create_thread(dtp, dpr, dtp->dt_prcmode) != 0) + return (NULL); /* dt_proc_error() has been called for us */ + + dpr->dpr_hash = dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)]; + dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)] = dpr; + dt_list_prepend(&dph->dph_lrulist, dpr); + + dt_dprintf("created pid %d\n", (int)dpr->dpr_pid); + dpr->dpr_refs++; + + return (dpr->dpr_proc); +} + +struct ps_prochandle * +dt_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags, int nomonitor) +{ + dt_proc_hash_t *dph = dtp->dt_procs; + uint_t h = pid & (dph->dph_hashlen - 1); + dt_proc_t *dpr, *opr; + int err; + + /* + * Search the hash table for the pid. If it is already grabbed or + * created, move the handle to the front of the lrulist, increment + * the reference count, and return the existing ps_prochandle. + */ + for (dpr = dph->dph_hash[h]; dpr != NULL; dpr = dpr->dpr_hash) { + if (dpr->dpr_pid == pid && !dpr->dpr_stale) { + /* + * If the cached handle was opened read-only and + * this request is for a writeable handle, mark + * the cached handle as stale and open a new handle. + * Since it's stale, unmark it as cacheable. + */ + if (dpr->dpr_rdonly && !(flags & PGRAB_RDONLY)) { + dt_dprintf("upgrading pid %d\n", (int)pid); + dpr->dpr_stale = B_TRUE; + dpr->dpr_cacheable = B_FALSE; + dph->dph_lrucnt--; + break; + } + + dt_dprintf("grabbed pid %d (cached)\n", (int)pid); + dt_list_delete(&dph->dph_lrulist, dpr); + dt_list_prepend(&dph->dph_lrulist, dpr); + dpr->dpr_refs++; + return (dpr->dpr_proc); + } + } + + if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL) + return (NULL); /* errno is set for us */ + + (void) pthread_mutex_init(&dpr->dpr_lock, NULL); + (void) pthread_cond_init(&dpr->dpr_cv, NULL); + +#if defined(sun) + if ((dpr->dpr_proc = Pgrab(pid, flags, &err)) == NULL) { +#else + if ((err = proc_attach(pid, flags, &dpr->dpr_proc)) != 0) { +#endif + return (dt_proc_error(dtp, dpr, + "failed to grab pid %d: %s\n", (int)pid, Pgrab_error(err))); + } + + dpr->dpr_hdl = dtp; + dpr->dpr_pid = pid; + + (void) Punsetflags(dpr->dpr_proc, PR_KLC); + (void) Psetflags(dpr->dpr_proc, PR_RLC); + + /* + * If we are attempting to grab the process without a monitor + * thread, then mark the process cacheable only if it's being + * grabbed read-only. If we're currently caching more process + * handles than dph_lrulim permits, attempt to find the + * least-recently-used handle that is currently unreferenced and + * release it from the cache. Otherwise we are grabbing the process + * for control: create a control thread for this process and store + * its ID in dpr->dpr_tid. + */ + if (nomonitor || (flags & PGRAB_RDONLY)) { + if (dph->dph_lrucnt >= dph->dph_lrulim) { + for (opr = dt_list_prev(&dph->dph_lrulist); + opr != NULL; opr = dt_list_prev(opr)) { + if (opr->dpr_cacheable && opr->dpr_refs == 0) { + dt_proc_destroy(dtp, opr->dpr_proc); + break; + } + } + } + + if (flags & PGRAB_RDONLY) { + dpr->dpr_cacheable = B_TRUE; + dpr->dpr_rdonly = B_TRUE; + dph->dph_lrucnt++; + } + + } else if (dt_proc_create_thread(dtp, dpr, DT_PROC_STOP_GRAB) != 0) + return (NULL); /* dt_proc_error() has been called for us */ + + dpr->dpr_hash = dph->dph_hash[h]; + dph->dph_hash[h] = dpr; + dt_list_prepend(&dph->dph_lrulist, dpr); + + dt_dprintf("grabbed pid %d\n", (int)pid); + dpr->dpr_refs++; + + return (dpr->dpr_proc); +} + +void +dt_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P) +{ + dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); + dt_proc_hash_t *dph = dtp->dt_procs; + + assert(dpr != NULL); + assert(dpr->dpr_refs != 0); + + if (--dpr->dpr_refs == 0 && + (!dpr->dpr_cacheable || dph->dph_lrucnt > dph->dph_lrulim)) + dt_proc_destroy(dtp, P); +} + +void +dt_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P) +{ + dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); + + (void) pthread_mutex_lock(&dpr->dpr_lock); + + if (dpr->dpr_stop & DT_PROC_STOP_IDLE) { + dpr->dpr_stop &= ~DT_PROC_STOP_IDLE; + (void) pthread_cond_broadcast(&dpr->dpr_cv); + } + + (void) pthread_mutex_unlock(&dpr->dpr_lock); +} + +void +dt_proc_lock(dtrace_hdl_t *dtp, struct ps_prochandle *P) +{ + dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); + int err = pthread_mutex_lock(&dpr->dpr_lock); + assert(err == 0); /* check for recursion */ +} + +void +dt_proc_unlock(dtrace_hdl_t *dtp, struct ps_prochandle *P) +{ + dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE); + int err = pthread_mutex_unlock(&dpr->dpr_lock); + assert(err == 0); /* check for unheld lock */ +} + +void +dt_proc_hash_create(dtrace_hdl_t *dtp) +{ + if ((dtp->dt_procs = dt_zalloc(dtp, sizeof (dt_proc_hash_t) + + sizeof (dt_proc_t *) * _dtrace_pidbuckets - 1)) != NULL) { + + (void) pthread_mutex_init(&dtp->dt_procs->dph_lock, NULL); + (void) pthread_cond_init(&dtp->dt_procs->dph_cv, NULL); + + dtp->dt_procs->dph_hashlen = _dtrace_pidbuckets; + dtp->dt_procs->dph_lrulim = _dtrace_pidlrulim; + } +} + +void +dt_proc_hash_destroy(dtrace_hdl_t *dtp) +{ + dt_proc_hash_t *dph = dtp->dt_procs; + dt_proc_t *dpr; + + while ((dpr = dt_list_next(&dph->dph_lrulist)) != NULL) + dt_proc_destroy(dtp, dpr->dpr_proc); + + dtp->dt_procs = NULL; + dt_free(dtp, dph); +} + +struct ps_prochandle * +dtrace_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv, + proc_child_func *pcf, void *child_arg) +{ + dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target"); + struct ps_prochandle *P = dt_proc_create(dtp, file, argv, pcf, child_arg); + + if (P != NULL && idp != NULL && idp->di_id == 0) { +#if defined(sun) + idp->di_id = Pstatus(P)->pr_pid; /* $target = created pid */ +#else + idp->di_id = proc_getpid(P); /* $target = created pid */ +#endif + } + + return (P); +} + +struct ps_prochandle * +dtrace_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags) +{ + dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target"); + struct ps_prochandle *P = dt_proc_grab(dtp, pid, flags, 0); + + if (P != NULL && idp != NULL && idp->di_id == 0) + idp->di_id = pid; /* $target = grabbed pid */ + + return (P); +} + +void +dtrace_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P) +{ + dt_proc_release(dtp, P); +} + +void +dtrace_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P) +{ + dt_proc_continue(dtp, P); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.h new file mode 100644 index 0000000..d1fc765 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.h @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_PROC_H +#define _DT_PROC_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libproc.h> +#include <dtrace.h> +#include <pthread.h> +#include <dt_list.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dt_proc { + dt_list_t dpr_list; /* prev/next pointers for lru chain */ + struct dt_proc *dpr_hash; /* next pointer for pid hash chain */ + dtrace_hdl_t *dpr_hdl; /* back pointer to libdtrace handle */ + struct ps_prochandle *dpr_proc; /* proc handle for libproc calls */ + char dpr_errmsg[BUFSIZ]; /* error message */ + rd_agent_t *dpr_rtld; /* rtld handle for librtld_db calls */ + pthread_mutex_t dpr_lock; /* lock for manipulating dpr_hdl */ + pthread_cond_t dpr_cv; /* cond for dpr_stop/quit/done */ + pid_t dpr_pid; /* pid of process */ + uint_t dpr_refs; /* reference count */ + uint8_t dpr_cacheable; /* cache handle using lru list */ + uint8_t dpr_stop; /* stop mask: see flag bits below */ + uint8_t dpr_quit; /* quit flag: ctl thread should quit */ + uint8_t dpr_done; /* done flag: ctl thread has exited */ + uint8_t dpr_usdt; /* usdt flag: usdt initialized */ + uint8_t dpr_stale; /* proc flag: been deprecated */ + uint8_t dpr_rdonly; /* proc flag: opened read-only */ + pthread_t dpr_tid; /* control thread (or zero if none) */ + dt_list_t dpr_bps; /* list of dt_bkpt_t structures */ +} dt_proc_t; + +typedef struct dt_proc_notify { + dt_proc_t *dprn_dpr; /* process associated with the event */ + char dprn_errmsg[BUFSIZ]; /* error message */ + struct dt_proc_notify *dprn_next; /* next pointer */ +} dt_proc_notify_t; + +#define DT_PROC_STOP_IDLE 0x01 /* idle on owner's stop request */ +#define DT_PROC_STOP_CREATE 0x02 /* wait on dpr_cv at process exec */ +#define DT_PROC_STOP_GRAB 0x04 /* wait on dpr_cv at process grab */ +#define DT_PROC_STOP_PREINIT 0x08 /* wait on dpr_cv at rtld preinit */ +#define DT_PROC_STOP_POSTINIT 0x10 /* wait on dpr_cv at rtld postinit */ +#define DT_PROC_STOP_MAIN 0x20 /* wait on dpr_cv at a.out`main() */ + +typedef void dt_bkpt_f(dtrace_hdl_t *, dt_proc_t *, void *); + +typedef struct dt_bkpt { + dt_list_t dbp_list; /* prev/next pointers for bkpt list */ + dt_bkpt_f *dbp_func; /* callback function to execute */ + void *dbp_data; /* callback function private data */ + uintptr_t dbp_addr; /* virtual address of breakpoint */ + ulong_t dbp_instr; /* saved instruction from breakpoint */ + ulong_t dbp_hits; /* count of breakpoint hits for debug */ + int dbp_active; /* flag indicating breakpoint is on */ +} dt_bkpt_t; + +typedef struct dt_proc_hash { + pthread_mutex_t dph_lock; /* lock protecting dph_notify list */ + pthread_cond_t dph_cv; /* cond for waiting for dph_notify */ + dt_proc_notify_t *dph_notify; /* list of pending proc notifications */ + dt_list_t dph_lrulist; /* list of dt_proc_t's in lru order */ + uint_t dph_lrulim; /* limit on number of procs to hold */ + uint_t dph_lrucnt; /* count of cached process handles */ + uint_t dph_hashlen; /* size of hash chains array */ + dt_proc_t *dph_hash[1]; /* hash chains array */ +} dt_proc_hash_t; + +extern struct ps_prochandle *dt_proc_create(dtrace_hdl_t *, + const char *, char *const *, proc_child_func *, void *); + +extern struct ps_prochandle *dt_proc_grab(dtrace_hdl_t *, pid_t, int, int); +extern void dt_proc_release(dtrace_hdl_t *, struct ps_prochandle *); +extern void dt_proc_continue(dtrace_hdl_t *, struct ps_prochandle *); +extern void dt_proc_lock(dtrace_hdl_t *, struct ps_prochandle *); +extern void dt_proc_unlock(dtrace_hdl_t *, struct ps_prochandle *); +extern dt_proc_t *dt_proc_lookup(dtrace_hdl_t *, struct ps_prochandle *, int); + +extern void dt_proc_hash_create(dtrace_hdl_t *); +extern void dt_proc_hash_destroy(dtrace_hdl_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_PROC_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_program.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_program.c new file mode 100644 index 0000000..a325c42 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_program.c @@ -0,0 +1,626 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. + */ + +#include <unistd.h> +#include <strings.h> +#include <stdlib.h> +#include <errno.h> +#include <assert.h> +#include <ctype.h> +#if defined(sun) +#include <alloca.h> +#endif + +#include <dt_impl.h> +#include <dt_program.h> +#include <dt_printf.h> +#include <dt_provider.h> + +dtrace_prog_t * +dt_program_create(dtrace_hdl_t *dtp) +{ + dtrace_prog_t *pgp = dt_zalloc(dtp, sizeof (dtrace_prog_t)); + + if (pgp != NULL) { + dt_list_append(&dtp->dt_programs, pgp); + } else { + (void) dt_set_errno(dtp, EDT_NOMEM); + return (NULL); + } + + /* + * By default, programs start with DOF version 1 so that output files + * containing DOF are backward compatible. If a program requires new + * DOF features, the version is increased as needed. + */ + pgp->dp_dofversion = DOF_VERSION_1; + + return (pgp); +} + +void +dt_program_destroy(dtrace_hdl_t *dtp, dtrace_prog_t *pgp) +{ + dt_stmt_t *stp, *next; + uint_t i; + + for (stp = dt_list_next(&pgp->dp_stmts); stp != NULL; stp = next) { + next = dt_list_next(stp); + dtrace_stmt_destroy(dtp, stp->ds_desc); + dt_free(dtp, stp); + } + + for (i = 0; i < pgp->dp_xrefslen; i++) + dt_free(dtp, pgp->dp_xrefs[i]); + + dt_free(dtp, pgp->dp_xrefs); + dt_list_delete(&dtp->dt_programs, pgp); + dt_free(dtp, pgp); +} + +/*ARGSUSED*/ +void +dtrace_program_info(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, + dtrace_proginfo_t *pip) +{ + dt_stmt_t *stp; + dtrace_actdesc_t *ap; + dtrace_ecbdesc_t *last = NULL; + + if (pip == NULL) + return; + + bzero(pip, sizeof (dtrace_proginfo_t)); + + if (dt_list_next(&pgp->dp_stmts) != NULL) { + pip->dpi_descattr = _dtrace_maxattr; + pip->dpi_stmtattr = _dtrace_maxattr; + } else { + pip->dpi_descattr = _dtrace_defattr; + pip->dpi_stmtattr = _dtrace_defattr; + } + + for (stp = dt_list_next(&pgp->dp_stmts); stp; stp = dt_list_next(stp)) { + dtrace_ecbdesc_t *edp = stp->ds_desc->dtsd_ecbdesc; + + if (edp == last) + continue; + last = edp; + + pip->dpi_descattr = + dt_attr_min(stp->ds_desc->dtsd_descattr, pip->dpi_descattr); + + pip->dpi_stmtattr = + dt_attr_min(stp->ds_desc->dtsd_stmtattr, pip->dpi_stmtattr); + + /* + * If there aren't any actions, account for the fact that + * recording the epid will generate a record. + */ + if (edp->dted_action == NULL) + pip->dpi_recgens++; + + for (ap = edp->dted_action; ap != NULL; ap = ap->dtad_next) { + if (ap->dtad_kind == DTRACEACT_SPECULATE) { + pip->dpi_speculations++; + continue; + } + + if (DTRACEACT_ISAGG(ap->dtad_kind)) { + pip->dpi_recgens -= ap->dtad_arg; + pip->dpi_aggregates++; + continue; + } + + if (DTRACEACT_ISDESTRUCTIVE(ap->dtad_kind)) + continue; + + if (ap->dtad_kind == DTRACEACT_DIFEXPR && + ap->dtad_difo->dtdo_rtype.dtdt_kind == + DIF_TYPE_CTF && + ap->dtad_difo->dtdo_rtype.dtdt_size == 0) + continue; + + pip->dpi_recgens++; + } + } +} + +int +dtrace_program_exec(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, + dtrace_proginfo_t *pip) +{ + dtrace_enable_io_t args; + void *dof; + int n, err; + + dtrace_program_info(dtp, pgp, pip); + + if ((dof = dtrace_dof_create(dtp, pgp, DTRACE_D_STRIP)) == NULL) + return (-1); + + args.dof = dof; + args.n_matched = 0; + n = dt_ioctl(dtp, DTRACEIOC_ENABLE, &args); + dtrace_dof_destroy(dtp, dof); + + if (n == -1) { + switch (errno) { + case EINVAL: + err = EDT_DIFINVAL; + break; + case EFAULT: + err = EDT_DIFFAULT; + break; + case E2BIG: + err = EDT_DIFSIZE; + break; + case EBUSY: + err = EDT_ENABLING_ERR; + break; + default: + err = errno; + } + + return (dt_set_errno(dtp, err)); + } + + if (pip != NULL) + pip->dpi_matches += args.n_matched; + + return (0); +} + +static void +dt_ecbdesc_hold(dtrace_ecbdesc_t *edp) +{ + edp->dted_refcnt++; +} + +void +dt_ecbdesc_release(dtrace_hdl_t *dtp, dtrace_ecbdesc_t *edp) +{ + if (--edp->dted_refcnt > 0) + return; + + dt_difo_free(dtp, edp->dted_pred.dtpdd_difo); + assert(edp->dted_action == NULL); + dt_free(dtp, edp); +} + +dtrace_ecbdesc_t * +dt_ecbdesc_create(dtrace_hdl_t *dtp, const dtrace_probedesc_t *pdp) +{ + dtrace_ecbdesc_t *edp; + + if ((edp = dt_zalloc(dtp, sizeof (dtrace_ecbdesc_t))) == NULL) { + (void) dt_set_errno(dtp, EDT_NOMEM); + return (NULL); + } + + edp->dted_probe = *pdp; + dt_ecbdesc_hold(edp); + return (edp); +} + +dtrace_stmtdesc_t * +dtrace_stmt_create(dtrace_hdl_t *dtp, dtrace_ecbdesc_t *edp) +{ + dtrace_stmtdesc_t *sdp; + + if ((sdp = dt_zalloc(dtp, sizeof (dtrace_stmtdesc_t))) == NULL) + return (NULL); + + dt_ecbdesc_hold(edp); + sdp->dtsd_ecbdesc = edp; + sdp->dtsd_descattr = _dtrace_defattr; + sdp->dtsd_stmtattr = _dtrace_defattr; + + return (sdp); +} + +dtrace_actdesc_t * +dtrace_stmt_action(dtrace_hdl_t *dtp, dtrace_stmtdesc_t *sdp) +{ + dtrace_actdesc_t *new; + dtrace_ecbdesc_t *edp = sdp->dtsd_ecbdesc; + + if ((new = dt_alloc(dtp, sizeof (dtrace_actdesc_t))) == NULL) + return (NULL); + + if (sdp->dtsd_action_last != NULL) { + assert(sdp->dtsd_action != NULL); + assert(sdp->dtsd_action_last->dtad_next == NULL); + sdp->dtsd_action_last->dtad_next = new; + } else { + dtrace_actdesc_t *ap = edp->dted_action; + + assert(sdp->dtsd_action == NULL); + sdp->dtsd_action = new; + + while (ap != NULL && ap->dtad_next != NULL) + ap = ap->dtad_next; + + if (ap == NULL) + edp->dted_action = new; + else + ap->dtad_next = new; + } + + sdp->dtsd_action_last = new; + bzero(new, sizeof (dtrace_actdesc_t)); + new->dtad_uarg = (uintptr_t)sdp; + + return (new); +} + +int +dtrace_stmt_add(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, dtrace_stmtdesc_t *sdp) +{ + dt_stmt_t *stp = dt_alloc(dtp, sizeof (dt_stmt_t)); + + if (stp == NULL) + return (-1); /* errno is set for us */ + + dt_list_append(&pgp->dp_stmts, stp); + stp->ds_desc = sdp; + + return (0); +} + +int +dtrace_stmt_iter(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, + dtrace_stmt_f *func, void *data) +{ + dt_stmt_t *stp, *next; + int status = 0; + + for (stp = dt_list_next(&pgp->dp_stmts); stp != NULL; stp = next) { + next = dt_list_next(stp); + if ((status = func(dtp, pgp, stp->ds_desc, data)) != 0) + break; + } + + return (status); +} + +void +dtrace_stmt_destroy(dtrace_hdl_t *dtp, dtrace_stmtdesc_t *sdp) +{ + dtrace_ecbdesc_t *edp = sdp->dtsd_ecbdesc; + + /* + * We need to remove any actions that we have on this ECB, and + * remove our hold on the ECB itself. + */ + if (sdp->dtsd_action != NULL) { + dtrace_actdesc_t *last = sdp->dtsd_action_last; + dtrace_actdesc_t *ap, *next; + + assert(last != NULL); + + for (ap = edp->dted_action; ap != NULL; ap = ap->dtad_next) { + if (ap == sdp->dtsd_action) + break; + + if (ap->dtad_next == sdp->dtsd_action) + break; + } + + assert(ap != NULL); + + if (ap == edp->dted_action) + edp->dted_action = last->dtad_next; + else + ap->dtad_next = last->dtad_next; + + /* + * We have now removed our action list from its ECB; we can + * safely destroy the list. + */ + last->dtad_next = NULL; + + for (ap = sdp->dtsd_action; ap != NULL; ap = next) { + assert(ap->dtad_uarg == (uintptr_t)sdp); + dt_difo_free(dtp, ap->dtad_difo); + next = ap->dtad_next; + dt_free(dtp, ap); + } + } + + if (sdp->dtsd_fmtdata != NULL) + dt_printf_destroy(sdp->dtsd_fmtdata); + dt_free(dtp, sdp->dtsd_strdata); + + dt_ecbdesc_release(dtp, sdp->dtsd_ecbdesc); + dt_free(dtp, sdp); +} + +typedef struct dt_header_info { + dtrace_hdl_t *dthi_dtp; /* consumer handle */ + FILE *dthi_out; /* output file */ + char *dthi_pmname; /* provider macro name */ + char *dthi_pfname; /* provider function name */ + int dthi_empty; /* should we generate empty macros */ +} dt_header_info_t; + +static void +dt_header_fmt_macro(char *buf, const char *str) +{ + for (;;) { + if (islower(*str)) { + *buf++ = *str++ + 'A' - 'a'; + } else if (*str == '-') { + *buf++ = '_'; + str++; + } else if (*str == '.') { + *buf++ = '_'; + str++; + } else if ((*buf++ = *str++) == '\0') { + break; + } + } +} + +static void +dt_header_fmt_func(char *buf, const char *str) +{ + for (;;) { + if (*str == '-') { + *buf++ = '_'; + *buf++ = '_'; + str++; + } else if ((*buf++ = *str++) == '\0') { + break; + } + } +} + +/*ARGSUSED*/ +static int +dt_header_decl(dt_idhash_t *dhp, dt_ident_t *idp, void *data) +{ + dt_header_info_t *infop = data; + dtrace_hdl_t *dtp = infop->dthi_dtp; + dt_probe_t *prp = idp->di_data; + dt_node_t *dnp; + char buf[DT_TYPE_NAMELEN]; + char *fname; + const char *p; + int i; + + p = prp->pr_name; + for (i = 0; (p = strchr(p, '-')) != NULL; i++) + p++; + + fname = alloca(strlen(prp->pr_name) + 1 + i); + dt_header_fmt_func(fname, prp->pr_name); + + if (fprintf(infop->dthi_out, "extern void __dtrace_%s___%s(", + infop->dthi_pfname, fname) < 0) + return (dt_set_errno(dtp, errno)); + + for (dnp = prp->pr_nargs, i = 0; dnp != NULL; dnp = dnp->dn_list, i++) { + if (fprintf(infop->dthi_out, "%s", + ctf_type_name(dnp->dn_ctfp, dnp->dn_type, + buf, sizeof (buf))) < 0) + return (dt_set_errno(dtp, errno)); + + if (i + 1 != prp->pr_nargc && + fprintf(infop->dthi_out, ", ") < 0) + return (dt_set_errno(dtp, errno)); + } + + if (i == 0 && fprintf(infop->dthi_out, "void") < 0) + return (dt_set_errno(dtp, errno)); + + if (fprintf(infop->dthi_out, ");\n") < 0) + return (dt_set_errno(dtp, errno)); + + if (fprintf(infop->dthi_out, + "#ifndef\t__sparc\n" + "extern int __dtraceenabled_%s___%s(void);\n" + "#else\n" + "extern int __dtraceenabled_%s___%s(long);\n" + "#endif\n", + infop->dthi_pfname, fname, infop->dthi_pfname, fname) < 0) + return (dt_set_errno(dtp, errno)); + + return (0); +} + +/*ARGSUSED*/ +static int +dt_header_probe(dt_idhash_t *dhp, dt_ident_t *idp, void *data) +{ + dt_header_info_t *infop = data; + dtrace_hdl_t *dtp = infop->dthi_dtp; + dt_probe_t *prp = idp->di_data; + char *mname, *fname; + const char *p; + int i; + + p = prp->pr_name; + for (i = 0; (p = strchr(p, '-')) != NULL; i++) + p++; + + mname = alloca(strlen(prp->pr_name) + 1); + dt_header_fmt_macro(mname, prp->pr_name); + + fname = alloca(strlen(prp->pr_name) + 1 + i); + dt_header_fmt_func(fname, prp->pr_name); + + if (fprintf(infop->dthi_out, "#define\t%s_%s(", + infop->dthi_pmname, mname) < 0) + return (dt_set_errno(dtp, errno)); + + for (i = 0; i < prp->pr_nargc; i++) { + if (fprintf(infop->dthi_out, "arg%d", i) < 0) + return (dt_set_errno(dtp, errno)); + + if (i + 1 != prp->pr_nargc && + fprintf(infop->dthi_out, ", ") < 0) + return (dt_set_errno(dtp, errno)); + } + + if (!infop->dthi_empty) { + if (fprintf(infop->dthi_out, ") \\\n\t") < 0) + return (dt_set_errno(dtp, errno)); + + if (fprintf(infop->dthi_out, "__dtrace_%s___%s(", + infop->dthi_pfname, fname) < 0) + return (dt_set_errno(dtp, errno)); + + for (i = 0; i < prp->pr_nargc; i++) { + if (fprintf(infop->dthi_out, "arg%d", i) < 0) + return (dt_set_errno(dtp, errno)); + + if (i + 1 != prp->pr_nargc && + fprintf(infop->dthi_out, ", ") < 0) + return (dt_set_errno(dtp, errno)); + } + } + + if (fprintf(infop->dthi_out, ")\n") < 0) + return (dt_set_errno(dtp, errno)); + + if (!infop->dthi_empty) { + if (fprintf(infop->dthi_out, + "#ifndef\t__sparc\n" + "#define\t%s_%s_ENABLED() \\\n" + "\t__dtraceenabled_%s___%s()\n" + "#else\n" + "#define\t%s_%s_ENABLED() \\\n" + "\t__dtraceenabled_%s___%s(0)\n" + "#endif\n", + infop->dthi_pmname, mname, + infop->dthi_pfname, fname, + infop->dthi_pmname, mname, + infop->dthi_pfname, fname) < 0) + return (dt_set_errno(dtp, errno)); + + } else { + if (fprintf(infop->dthi_out, "#define\t%s_%s_ENABLED() (0)\n", + infop->dthi_pmname, mname) < 0) + return (dt_set_errno(dtp, errno)); + } + + return (0); +} + +static int +dt_header_provider(dtrace_hdl_t *dtp, dt_provider_t *pvp, FILE *out) +{ + dt_header_info_t info; + const char *p; + int i; + + if (pvp->pv_flags & DT_PROVIDER_IMPL) + return (0); + + /* + * Count the instances of the '-' character since we'll need to double + * those up. + */ + p = pvp->pv_desc.dtvd_name; + for (i = 0; (p = strchr(p, '-')) != NULL; i++) + p++; + + info.dthi_dtp = dtp; + info.dthi_out = out; + info.dthi_empty = 0; + + info.dthi_pmname = alloca(strlen(pvp->pv_desc.dtvd_name) + 1); + dt_header_fmt_macro(info.dthi_pmname, pvp->pv_desc.dtvd_name); + + info.dthi_pfname = alloca(strlen(pvp->pv_desc.dtvd_name) + 1 + i); + dt_header_fmt_func(info.dthi_pfname, pvp->pv_desc.dtvd_name); + +#ifdef __FreeBSD__ + if (fprintf(out, "#include <sys/sdt.h>\n\n") < 0) + return (dt_set_errno(dtp, errno)); +#endif + if (fprintf(out, "#if _DTRACE_VERSION\n\n") < 0) + return (dt_set_errno(dtp, errno)); + + if (dt_idhash_iter(pvp->pv_probes, dt_header_probe, &info) != 0) + return (-1); /* dt_errno is set for us */ + if (fprintf(out, "\n\n") < 0) + return (dt_set_errno(dtp, errno)); + if (dt_idhash_iter(pvp->pv_probes, dt_header_decl, &info) != 0) + return (-1); /* dt_errno is set for us */ + + if (fprintf(out, "\n#else\n\n") < 0) + return (dt_set_errno(dtp, errno)); + + info.dthi_empty = 1; + + if (dt_idhash_iter(pvp->pv_probes, dt_header_probe, &info) != 0) + return (-1); /* dt_errno is set for us */ + + if (fprintf(out, "\n#endif\n\n") < 0) + return (dt_set_errno(dtp, errno)); + + return (0); +} + +int +dtrace_program_header(dtrace_hdl_t *dtp, FILE *out, const char *fname) +{ + dt_provider_t *pvp; + char *mfname, *p; + + if (fname != NULL) { + if ((p = strrchr(fname, '/')) != NULL) + fname = p + 1; + + mfname = alloca(strlen(fname) + 1); + dt_header_fmt_macro(mfname, fname); + if (fprintf(out, "#ifndef\t_%s\n#define\t_%s\n\n", + mfname, mfname) < 0) + return (dt_set_errno(dtp, errno)); + } + + if (fprintf(out, "#include <unistd.h>\n\n") < 0) + return (-1); + + if (fprintf(out, "#ifdef\t__cplusplus\nextern \"C\" {\n#endif\n\n") < 0) + return (-1); + + for (pvp = dt_list_next(&dtp->dt_provlist); + pvp != NULL; pvp = dt_list_next(pvp)) { + if (dt_header_provider(dtp, pvp, out) != 0) + return (-1); /* dt_errno is set for us */ + } + + if (fprintf(out, "\n#ifdef\t__cplusplus\n}\n#endif\n") < 0) + return (dt_set_errno(dtp, errno)); + + if (fname != NULL && fprintf(out, "\n#endif\t/* _%s */\n", mfname) < 0) + return (dt_set_errno(dtp, errno)); + + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_program.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_program.h new file mode 100644 index 0000000..3fe1c39 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_program.h @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_PROGRAM_H +#define _DT_PROGRAM_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <dtrace.h> +#include <dt_list.h> + +typedef struct dt_stmt { + dt_list_t ds_list; /* list forward/back pointers */ + dtrace_stmtdesc_t *ds_desc; /* pointer to statement description */ +} dt_stmt_t; + +struct dtrace_prog { + dt_list_t dp_list; /* list forward/back pointers */ + dt_list_t dp_stmts; /* linked list of dt_stmt_t's */ + ulong_t **dp_xrefs; /* array of translator reference bitmaps */ + uint_t dp_xrefslen; /* length of dp_xrefs array */ + uint8_t dp_dofversion; /* DOF version this program requires */ +}; + +extern dtrace_prog_t *dt_program_create(dtrace_hdl_t *); +extern void dt_program_destroy(dtrace_hdl_t *, dtrace_prog_t *); + +extern dtrace_ecbdesc_t *dt_ecbdesc_create(dtrace_hdl_t *, + const dtrace_probedesc_t *); +extern void dt_ecbdesc_release(dtrace_hdl_t *, dtrace_ecbdesc_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_PROGRAM_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c new file mode 100644 index 0000000..cd8ceb6 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c @@ -0,0 +1,902 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <sys/types.h> +#if defined(sun) +#include <sys/sysmacros.h> +#endif + +#include <assert.h> +#include <limits.h> +#include <strings.h> +#include <stdlib.h> +#if defined(sun) +#include <alloca.h> +#endif +#include <unistd.h> +#include <errno.h> + +#include <dt_provider.h> +#include <dt_module.h> +#include <dt_string.h> +#include <dt_list.h> +#include <dt_pid.h> +#include <dtrace.h> + +static dt_provider_t * +dt_provider_insert(dtrace_hdl_t *dtp, dt_provider_t *pvp, uint_t h) +{ + dt_list_append(&dtp->dt_provlist, pvp); + + pvp->pv_next = dtp->dt_provs[h]; + dtp->dt_provs[h] = pvp; + dtp->dt_nprovs++; + + return (pvp); +} + +dt_provider_t * +dt_provider_lookup(dtrace_hdl_t *dtp, const char *name) +{ + uint_t h = dt_strtab_hash(name, NULL) % dtp->dt_provbuckets; + dtrace_providerdesc_t desc; + dt_provider_t *pvp; + + for (pvp = dtp->dt_provs[h]; pvp != NULL; pvp = pvp->pv_next) { + if (strcmp(pvp->pv_desc.dtvd_name, name) == 0) + return (pvp); + } + + if (strisglob(name) || name[0] == '\0') { + (void) dt_set_errno(dtp, EDT_NOPROV); + return (NULL); + } + + bzero(&desc, sizeof (desc)); + (void) strlcpy(desc.dtvd_name, name, DTRACE_PROVNAMELEN); + + if (dt_ioctl(dtp, DTRACEIOC_PROVIDER, &desc) == -1) { + (void) dt_set_errno(dtp, errno == ESRCH ? EDT_NOPROV : errno); + return (NULL); + } + + if ((pvp = dt_provider_create(dtp, name)) == NULL) + return (NULL); /* dt_errno is set for us */ + + bcopy(&desc, &pvp->pv_desc, sizeof (desc)); + pvp->pv_flags |= DT_PROVIDER_IMPL; + return (pvp); +} + +dt_provider_t * +dt_provider_create(dtrace_hdl_t *dtp, const char *name) +{ + dt_provider_t *pvp; + + if ((pvp = dt_zalloc(dtp, sizeof (dt_provider_t))) == NULL) + return (NULL); + + (void) strlcpy(pvp->pv_desc.dtvd_name, name, DTRACE_PROVNAMELEN); + pvp->pv_probes = dt_idhash_create(pvp->pv_desc.dtvd_name, NULL, 0, 0); + pvp->pv_gen = dtp->dt_gen; + pvp->pv_hdl = dtp; + + if (pvp->pv_probes == NULL) { + dt_free(dtp, pvp); + (void) dt_set_errno(dtp, EDT_NOMEM); + return (NULL); + } + + pvp->pv_desc.dtvd_attr.dtpa_provider = _dtrace_prvattr; + pvp->pv_desc.dtvd_attr.dtpa_mod = _dtrace_prvattr; + pvp->pv_desc.dtvd_attr.dtpa_func = _dtrace_prvattr; + pvp->pv_desc.dtvd_attr.dtpa_name = _dtrace_prvattr; + pvp->pv_desc.dtvd_attr.dtpa_args = _dtrace_prvattr; + + return (dt_provider_insert(dtp, pvp, + dt_strtab_hash(name, NULL) % dtp->dt_provbuckets)); +} + +void +dt_provider_destroy(dtrace_hdl_t *dtp, dt_provider_t *pvp) +{ + dt_provider_t **pp; + uint_t h; + + assert(pvp->pv_hdl == dtp); + + h = dt_strtab_hash(pvp->pv_desc.dtvd_name, NULL) % dtp->dt_provbuckets; + pp = &dtp->dt_provs[h]; + + while (*pp != NULL && *pp != pvp) + pp = &(*pp)->pv_next; + + assert(*pp != NULL && *pp == pvp); + *pp = pvp->pv_next; + + dt_list_delete(&dtp->dt_provlist, pvp); + dtp->dt_nprovs--; + + if (pvp->pv_probes != NULL) + dt_idhash_destroy(pvp->pv_probes); + + dt_node_link_free(&pvp->pv_nodes); + dt_free(dtp, pvp->pv_xrefs); + dt_free(dtp, pvp); +} + +int +dt_provider_xref(dtrace_hdl_t *dtp, dt_provider_t *pvp, id_t id) +{ + size_t oldsize = BT_SIZEOFMAP(pvp->pv_xrmax); + size_t newsize = BT_SIZEOFMAP(dtp->dt_xlatorid); + + assert(id >= 0 && id < dtp->dt_xlatorid); + + if (newsize > oldsize) { + ulong_t *xrefs = dt_zalloc(dtp, newsize); + + if (xrefs == NULL) + return (-1); + + bcopy(pvp->pv_xrefs, xrefs, oldsize); + dt_free(dtp, pvp->pv_xrefs); + + pvp->pv_xrefs = xrefs; + pvp->pv_xrmax = dtp->dt_xlatorid; + } + + BT_SET(pvp->pv_xrefs, id); + return (0); +} + +static uint8_t +dt_probe_argmap(dt_node_t *xnp, dt_node_t *nnp) +{ + uint8_t i; + + for (i = 0; nnp != NULL; i++) { + if (nnp->dn_string != NULL && + strcmp(nnp->dn_string, xnp->dn_string) == 0) + break; + else + nnp = nnp->dn_list; + } + + return (i); +} + +static dt_node_t * +dt_probe_alloc_args(dt_provider_t *pvp, int argc) +{ + dt_node_t *args = NULL, *pnp = NULL, *dnp; + int i; + + for (i = 0; i < argc; i++, pnp = dnp) { + if ((dnp = dt_node_xalloc(pvp->pv_hdl, DT_NODE_TYPE)) == NULL) + return (NULL); + + dnp->dn_link = pvp->pv_nodes; + pvp->pv_nodes = dnp; + + if (args == NULL) + args = dnp; + else + pnp->dn_list = dnp; + } + + return (args); +} + +static size_t +dt_probe_keylen(const dtrace_probedesc_t *pdp) +{ + return (strlen(pdp->dtpd_mod) + 1 + + strlen(pdp->dtpd_func) + 1 + strlen(pdp->dtpd_name) + 1); +} + +static char * +dt_probe_key(const dtrace_probedesc_t *pdp, char *s) +{ + (void) snprintf(s, INT_MAX, "%s:%s:%s", + pdp->dtpd_mod, pdp->dtpd_func, pdp->dtpd_name); + return (s); +} + +/* + * If a probe was discovered from the kernel, ask dtrace(7D) for a description + * of each of its arguments, including native and translated types. + */ +static dt_probe_t * +dt_probe_discover(dt_provider_t *pvp, const dtrace_probedesc_t *pdp) +{ + dtrace_hdl_t *dtp = pvp->pv_hdl; + char *name = dt_probe_key(pdp, alloca(dt_probe_keylen(pdp))); + + dt_node_t *xargs, *nargs; + dt_ident_t *idp; + dt_probe_t *prp; + + dtrace_typeinfo_t dtt; + int i, nc, xc; + + int adc = _dtrace_argmax; + dtrace_argdesc_t *adv = alloca(sizeof (dtrace_argdesc_t) * adc); + dtrace_argdesc_t *adp = adv; + + assert(strcmp(pvp->pv_desc.dtvd_name, pdp->dtpd_provider) == 0); + assert(pdp->dtpd_id != DTRACE_IDNONE); + + dt_dprintf("discovering probe %s:%s id=%d\n", + pvp->pv_desc.dtvd_name, name, pdp->dtpd_id); + + for (nc = -1, i = 0; i < adc; i++, adp++) { + bzero(adp, sizeof (dtrace_argdesc_t)); + adp->dtargd_ndx = i; + adp->dtargd_id = pdp->dtpd_id; + + if (dt_ioctl(dtp, DTRACEIOC_PROBEARG, adp) != 0) { + (void) dt_set_errno(dtp, errno); + return (NULL); + } + + if (adp->dtargd_ndx == DTRACE_ARGNONE) + break; /* all argument descs have been retrieved */ + + nc = MAX(nc, adp->dtargd_mapping); + } + + xc = i; + nc++; + + /* + * The pid provider believes in giving the kernel a break. No reason to + * give the kernel all the ctf containers that we're keeping ourselves + * just to get it back from it. So if we're coming from a pid provider + * probe and the kernel gave us no argument information we'll get some + * here. If for some crazy reason the kernel knows about our userland + * types then we just ignore this. + */ + if (xc == 0 && nc == 0 && + strncmp(pvp->pv_desc.dtvd_name, "pid", 3) == 0) { + nc = adc; + dt_pid_get_types(dtp, pdp, adv, &nc); + xc = nc; + } + + /* + * Now that we have discovered the number of native and translated + * arguments from the argument descriptions, allocate a new probe ident + * and corresponding dt_probe_t and hash it into the provider. + */ + xargs = dt_probe_alloc_args(pvp, xc); + nargs = dt_probe_alloc_args(pvp, nc); + + if ((xc != 0 && xargs == NULL) || (nc != 0 && nargs == NULL)) + return (NULL); /* dt_errno is set for us */ + + idp = dt_ident_create(name, DT_IDENT_PROBE, + DT_IDFLG_ORPHAN, pdp->dtpd_id, _dtrace_defattr, 0, + &dt_idops_probe, NULL, dtp->dt_gen); + + if (idp == NULL) { + (void) dt_set_errno(dtp, EDT_NOMEM); + return (NULL); + } + + if ((prp = dt_probe_create(dtp, idp, 2, + nargs, nc, xargs, xc)) == NULL) { + dt_ident_destroy(idp); + return (NULL); + } + + dt_probe_declare(pvp, prp); + + /* + * Once our new dt_probe_t is fully constructed, iterate over the + * cached argument descriptions and assign types to prp->pr_nargv[] + * and prp->pr_xargv[] and assign mappings to prp->pr_mapping[]. + */ + for (adp = adv, i = 0; i < xc; i++, adp++) { + if (dtrace_type_strcompile(dtp, + adp->dtargd_native, &dtt) != 0) { + dt_dprintf("failed to resolve input type %s " + "for %s:%s arg #%d: %s\n", adp->dtargd_native, + pvp->pv_desc.dtvd_name, name, i + 1, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + + dtt.dtt_object = NULL; + dtt.dtt_ctfp = NULL; + dtt.dtt_type = CTF_ERR; + } else { + dt_node_type_assign(prp->pr_nargv[adp->dtargd_mapping], + dtt.dtt_ctfp, dtt.dtt_type, + dtt.dtt_flags & DTT_FL_USER ? B_TRUE : B_FALSE); + } + + if (dtt.dtt_type != CTF_ERR && (adp->dtargd_xlate[0] == '\0' || + strcmp(adp->dtargd_native, adp->dtargd_xlate) == 0)) { + dt_node_type_propagate(prp->pr_nargv[ + adp->dtargd_mapping], prp->pr_xargv[i]); + } else if (dtrace_type_strcompile(dtp, + adp->dtargd_xlate, &dtt) != 0) { + dt_dprintf("failed to resolve output type %s " + "for %s:%s arg #%d: %s\n", adp->dtargd_xlate, + pvp->pv_desc.dtvd_name, name, i + 1, + dtrace_errmsg(dtp, dtrace_errno(dtp))); + + dtt.dtt_object = NULL; + dtt.dtt_ctfp = NULL; + dtt.dtt_type = CTF_ERR; + } else { + dt_node_type_assign(prp->pr_xargv[i], + dtt.dtt_ctfp, dtt.dtt_type, B_FALSE); + } + + prp->pr_mapping[i] = adp->dtargd_mapping; + prp->pr_argv[i] = dtt; + } + + return (prp); +} + +/* + * Lookup a probe declaration based on a known provider and full or partially + * specified module, function, and name. If the probe is not known to us yet, + * ask dtrace(7D) to match the description and then cache any useful results. + */ +dt_probe_t * +dt_probe_lookup(dt_provider_t *pvp, const char *s) +{ + dtrace_hdl_t *dtp = pvp->pv_hdl; + dtrace_probedesc_t pd; + dt_ident_t *idp; + size_t keylen; + char *key; + + if (dtrace_str2desc(dtp, DTRACE_PROBESPEC_NAME, s, &pd) != 0) + return (NULL); /* dt_errno is set for us */ + + keylen = dt_probe_keylen(&pd); + key = dt_probe_key(&pd, alloca(keylen)); + + /* + * If the probe is already declared, then return the dt_probe_t from + * the existing identifier. This could come from a static declaration + * or it could have been cached from an earlier call to this function. + */ + if ((idp = dt_idhash_lookup(pvp->pv_probes, key)) != NULL) + return (idp->di_data); + + /* + * If the probe isn't known, use the probe description computed above + * to ask dtrace(7D) to find the first matching probe. + */ + if (dt_ioctl(dtp, DTRACEIOC_PROBEMATCH, &pd) == 0) + return (dt_probe_discover(pvp, &pd)); + + if (errno == ESRCH || errno == EBADF) + (void) dt_set_errno(dtp, EDT_NOPROBE); + else + (void) dt_set_errno(dtp, errno); + + return (NULL); +} + +dt_probe_t * +dt_probe_create(dtrace_hdl_t *dtp, dt_ident_t *idp, int protoc, + dt_node_t *nargs, uint_t nargc, dt_node_t *xargs, uint_t xargc) +{ + dt_module_t *dmp; + dt_probe_t *prp; + const char *p; + uint_t i; + + assert(idp->di_kind == DT_IDENT_PROBE); + assert(idp->di_data == NULL); + + /* + * If only a single prototype is given, set xargc/s to nargc/s to + * simplify subsequent use. Note that we can have one or both of nargs + * and xargs be specified but set to NULL, indicating a void prototype. + */ + if (protoc < 2) { + assert(xargs == NULL); + assert(xargc == 0); + xargs = nargs; + xargc = nargc; + } + + if ((prp = dt_alloc(dtp, sizeof (dt_probe_t))) == NULL) + return (NULL); + + prp->pr_pvp = NULL; + prp->pr_ident = idp; + + p = strrchr(idp->di_name, ':'); + assert(p != NULL); + prp->pr_name = p + 1; + + prp->pr_nargs = nargs; + prp->pr_nargv = dt_alloc(dtp, sizeof (dt_node_t *) * nargc); + prp->pr_nargc = nargc; + prp->pr_xargs = xargs; + prp->pr_xargv = dt_alloc(dtp, sizeof (dt_node_t *) * xargc); + prp->pr_xargc = xargc; + prp->pr_mapping = dt_alloc(dtp, sizeof (uint8_t) * xargc); + prp->pr_inst = NULL; + prp->pr_argv = dt_alloc(dtp, sizeof (dtrace_typeinfo_t) * xargc); + prp->pr_argc = xargc; + + if ((prp->pr_nargc != 0 && prp->pr_nargv == NULL) || + (prp->pr_xargc != 0 && prp->pr_xargv == NULL) || + (prp->pr_xargc != 0 && prp->pr_mapping == NULL) || + (prp->pr_argc != 0 && prp->pr_argv == NULL)) { + dt_probe_destroy(prp); + return (NULL); + } + + for (i = 0; i < xargc; i++, xargs = xargs->dn_list) { + if (xargs->dn_string != NULL) + prp->pr_mapping[i] = dt_probe_argmap(xargs, nargs); + else + prp->pr_mapping[i] = i; + + prp->pr_xargv[i] = xargs; + + if ((dmp = dt_module_lookup_by_ctf(dtp, + xargs->dn_ctfp)) != NULL) + prp->pr_argv[i].dtt_object = dmp->dm_name; + else + prp->pr_argv[i].dtt_object = NULL; + + prp->pr_argv[i].dtt_ctfp = xargs->dn_ctfp; + prp->pr_argv[i].dtt_type = xargs->dn_type; + } + + for (i = 0; i < nargc; i++, nargs = nargs->dn_list) + prp->pr_nargv[i] = nargs; + + idp->di_data = prp; + return (prp); +} + +void +dt_probe_declare(dt_provider_t *pvp, dt_probe_t *prp) +{ + assert(prp->pr_ident->di_kind == DT_IDENT_PROBE); + assert(prp->pr_ident->di_data == prp); + assert(prp->pr_pvp == NULL); + + if (prp->pr_xargs != prp->pr_nargs) + pvp->pv_flags &= ~DT_PROVIDER_INTF; + + prp->pr_pvp = pvp; + dt_idhash_xinsert(pvp->pv_probes, prp->pr_ident); +} + +void +dt_probe_destroy(dt_probe_t *prp) +{ + dt_probe_instance_t *pip, *pip_next; + dtrace_hdl_t *dtp; + + if (prp->pr_pvp != NULL) + dtp = prp->pr_pvp->pv_hdl; + else + dtp = yypcb->pcb_hdl; + + dt_node_list_free(&prp->pr_nargs); + dt_node_list_free(&prp->pr_xargs); + + dt_free(dtp, prp->pr_nargv); + dt_free(dtp, prp->pr_xargv); + + for (pip = prp->pr_inst; pip != NULL; pip = pip_next) { + pip_next = pip->pi_next; + dt_free(dtp, pip->pi_rname); + dt_free(dtp, pip->pi_fname); + dt_free(dtp, pip->pi_offs); + dt_free(dtp, pip->pi_enoffs); + dt_free(dtp, pip); + } + + dt_free(dtp, prp->pr_mapping); + dt_free(dtp, prp->pr_argv); + dt_free(dtp, prp); +} + +int +dt_probe_define(dt_provider_t *pvp, dt_probe_t *prp, + const char *fname, const char *rname, uint32_t offset, int isenabled) +{ + dtrace_hdl_t *dtp = pvp->pv_hdl; + dt_probe_instance_t *pip; + uint32_t **offs; + uint_t *noffs, *maxoffs; + + assert(fname != NULL); + + for (pip = prp->pr_inst; pip != NULL; pip = pip->pi_next) { + if (strcmp(pip->pi_fname, fname) == 0 && + ((rname == NULL && pip->pi_rname == NULL) || + (rname != NULL && pip->pi_rname != NULL && + strcmp(pip->pi_rname, rname) == 0))) + break; + } + + if (pip == NULL) { + if ((pip = dt_zalloc(dtp, sizeof (*pip))) == NULL) + return (-1); + + if ((pip->pi_offs = dt_zalloc(dtp, sizeof (uint32_t))) == NULL) + goto nomem; + + if ((pip->pi_enoffs = dt_zalloc(dtp, + sizeof (uint32_t))) == NULL) + goto nomem; + + if ((pip->pi_fname = strdup(fname)) == NULL) + goto nomem; + + if (rname != NULL && (pip->pi_rname = strdup(rname)) == NULL) + goto nomem; + + pip->pi_noffs = 0; + pip->pi_maxoffs = 1; + pip->pi_nenoffs = 0; + pip->pi_maxenoffs = 1; + + pip->pi_next = prp->pr_inst; + + prp->pr_inst = pip; + } + + if (isenabled) { + offs = &pip->pi_enoffs; + noffs = &pip->pi_nenoffs; + maxoffs = &pip->pi_maxenoffs; + } else { + offs = &pip->pi_offs; + noffs = &pip->pi_noffs; + maxoffs = &pip->pi_maxoffs; + } + + if (*noffs == *maxoffs) { + uint_t new_max = *maxoffs * 2; + uint32_t *new_offs = dt_alloc(dtp, sizeof (uint32_t) * new_max); + + if (new_offs == NULL) + return (-1); + + bcopy(*offs, new_offs, sizeof (uint32_t) * *maxoffs); + + dt_free(dtp, *offs); + *maxoffs = new_max; + *offs = new_offs; + } + + dt_dprintf("defined probe %s %s:%s %s() +0x%x (%s)\n", + isenabled ? "(is-enabled)" : "", + pvp->pv_desc.dtvd_name, prp->pr_ident->di_name, fname, offset, + rname != NULL ? rname : fname); + + assert(*noffs < *maxoffs); + (*offs)[(*noffs)++] = offset; + + return (0); + +nomem: + dt_free(dtp, pip->pi_fname); + dt_free(dtp, pip->pi_enoffs); + dt_free(dtp, pip->pi_offs); + dt_free(dtp, pip); + return (dt_set_errno(dtp, EDT_NOMEM)); +} + +/* + * Lookup the dynamic translator type tag for the specified probe argument and + * assign the type to the specified node. If the type is not yet defined, add + * it to the "D" module's type container as a typedef for an unknown type. + */ +dt_node_t * +dt_probe_tag(dt_probe_t *prp, uint_t argn, dt_node_t *dnp) +{ + dtrace_hdl_t *dtp = prp->pr_pvp->pv_hdl; + dtrace_typeinfo_t dtt; + size_t len; + char *tag; + + len = snprintf(NULL, 0, "__dtrace_%s___%s_arg%u", + prp->pr_pvp->pv_desc.dtvd_name, prp->pr_name, argn); + + tag = alloca(len + 1); + + (void) snprintf(tag, len + 1, "__dtrace_%s___%s_arg%u", + prp->pr_pvp->pv_desc.dtvd_name, prp->pr_name, argn); + + if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_DDEFS, tag, &dtt) != 0) { + dtt.dtt_object = DTRACE_OBJ_DDEFS; + dtt.dtt_ctfp = DT_DYN_CTFP(dtp); + dtt.dtt_type = ctf_add_typedef(DT_DYN_CTFP(dtp), + CTF_ADD_ROOT, tag, DT_DYN_TYPE(dtp)); + + if (dtt.dtt_type == CTF_ERR || + ctf_update(dtt.dtt_ctfp) == CTF_ERR) { + xyerror(D_UNKNOWN, "cannot define type %s: %s\n", + tag, ctf_errmsg(ctf_errno(dtt.dtt_ctfp))); + } + } + + bzero(dnp, sizeof (dt_node_t)); + dnp->dn_kind = DT_NODE_TYPE; + + dt_node_type_assign(dnp, dtt.dtt_ctfp, dtt.dtt_type, B_FALSE); + dt_node_attr_assign(dnp, _dtrace_defattr); + + return (dnp); +} + +/*ARGSUSED*/ +static int +dt_probe_desc(dtrace_hdl_t *dtp, const dtrace_probedesc_t *pdp, void *arg) +{ + if (((dtrace_probedesc_t *)arg)->dtpd_id == DTRACE_IDNONE) { + bcopy(pdp, arg, sizeof (dtrace_probedesc_t)); + return (0); + } + + return (1); +} + +dt_probe_t * +dt_probe_info(dtrace_hdl_t *dtp, + const dtrace_probedesc_t *pdp, dtrace_probeinfo_t *pip) +{ + int m_is_glob = pdp->dtpd_mod[0] == '\0' || strisglob(pdp->dtpd_mod); + int f_is_glob = pdp->dtpd_func[0] == '\0' || strisglob(pdp->dtpd_func); + int n_is_glob = pdp->dtpd_name[0] == '\0' || strisglob(pdp->dtpd_name); + + dt_probe_t *prp = NULL; + const dtrace_pattr_t *pap; + dt_provider_t *pvp; + dt_ident_t *idp; + + /* + * Attempt to lookup the probe in our existing cache for this provider. + * If none is found and an explicit probe ID was specified, discover + * that specific probe and cache its description and arguments. + */ + if ((pvp = dt_provider_lookup(dtp, pdp->dtpd_provider)) != NULL) { + size_t keylen = dt_probe_keylen(pdp); + char *key = dt_probe_key(pdp, alloca(keylen)); + + if ((idp = dt_idhash_lookup(pvp->pv_probes, key)) != NULL) + prp = idp->di_data; + else if (pdp->dtpd_id != DTRACE_IDNONE) + prp = dt_probe_discover(pvp, pdp); + } + + /* + * If no probe was found in our cache, convert the caller's partial + * probe description into a fully-formed matching probe description by + * iterating over up to at most two probes that match 'pdp'. We then + * call dt_probe_discover() on the resulting probe identifier. + */ + if (prp == NULL) { + dtrace_probedesc_t pd; + int m; + + bzero(&pd, sizeof (pd)); + pd.dtpd_id = DTRACE_IDNONE; + + /* + * Call dtrace_probe_iter() to find matching probes. Our + * dt_probe_desc() callback will produce the following results: + * + * m < 0 dtrace_probe_iter() found zero matches (or failed). + * m > 0 dtrace_probe_iter() found more than one match. + * m = 0 dtrace_probe_iter() found exactly one match. + */ + if ((m = dtrace_probe_iter(dtp, pdp, dt_probe_desc, &pd)) < 0) + return (NULL); /* dt_errno is set for us */ + + if ((pvp = dt_provider_lookup(dtp, pd.dtpd_provider)) == NULL) + return (NULL); /* dt_errno is set for us */ + + /* + * If more than one probe was matched, then do not report probe + * information if either of the following conditions is true: + * + * (a) The Arguments Data stability of the matched provider is + * less than Evolving. + * + * (b) Any description component that is at least Evolving is + * empty or is specified using a globbing expression. + * + * These conditions imply that providers that provide Evolving + * or better Arguments Data stability must guarantee that all + * probes with identical field names in a field of Evolving or + * better Name stability have identical argument signatures. + */ + if (m > 0) { + if (pvp->pv_desc.dtvd_attr.dtpa_args.dtat_data < + DTRACE_STABILITY_EVOLVING) { + (void) dt_set_errno(dtp, EDT_UNSTABLE); + return (NULL); + } + + + if (pvp->pv_desc.dtvd_attr.dtpa_mod.dtat_name >= + DTRACE_STABILITY_EVOLVING && m_is_glob) { + (void) dt_set_errno(dtp, EDT_UNSTABLE); + return (NULL); + } + + if (pvp->pv_desc.dtvd_attr.dtpa_func.dtat_name >= + DTRACE_STABILITY_EVOLVING && f_is_glob) { + (void) dt_set_errno(dtp, EDT_UNSTABLE); + return (NULL); + } + + if (pvp->pv_desc.dtvd_attr.dtpa_name.dtat_name >= + DTRACE_STABILITY_EVOLVING && n_is_glob) { + (void) dt_set_errno(dtp, EDT_UNSTABLE); + return (NULL); + } + } + + /* + * If we matched a probe exported by dtrace(7D), then discover + * the real attributes. Otherwise grab the static declaration. + */ + if (pd.dtpd_id != DTRACE_IDNONE) + prp = dt_probe_discover(pvp, &pd); + else + prp = dt_probe_lookup(pvp, pd.dtpd_name); + + if (prp == NULL) + return (NULL); /* dt_errno is set for us */ + } + + assert(pvp != NULL && prp != NULL); + + /* + * Compute the probe description attributes by taking the minimum of + * the attributes of the specified fields. If no provider is specified + * or a glob pattern is used for the provider, use Unstable attributes. + */ + if (pdp->dtpd_provider[0] == '\0' || strisglob(pdp->dtpd_provider)) + pap = &_dtrace_prvdesc; + else + pap = &pvp->pv_desc.dtvd_attr; + + pip->dtp_attr = pap->dtpa_provider; + + if (!m_is_glob) + pip->dtp_attr = dt_attr_min(pip->dtp_attr, pap->dtpa_mod); + if (!f_is_glob) + pip->dtp_attr = dt_attr_min(pip->dtp_attr, pap->dtpa_func); + if (!n_is_glob) + pip->dtp_attr = dt_attr_min(pip->dtp_attr, pap->dtpa_name); + + pip->dtp_arga = pap->dtpa_args; + pip->dtp_argv = prp->pr_argv; + pip->dtp_argc = prp->pr_argc; + + return (prp); +} + +int +dtrace_probe_info(dtrace_hdl_t *dtp, + const dtrace_probedesc_t *pdp, dtrace_probeinfo_t *pip) +{ + return (dt_probe_info(dtp, pdp, pip) != NULL ? 0 : -1); +} + +/*ARGSUSED*/ +static int +dt_probe_iter(dt_idhash_t *ihp, dt_ident_t *idp, dt_probe_iter_t *pit) +{ + const dt_probe_t *prp = idp->di_data; + + if (!dt_gmatch(prp->pr_name, pit->pit_pat)) + return (0); /* continue on and examine next probe in hash */ + + (void) strlcpy(pit->pit_desc.dtpd_name, prp->pr_name, DTRACE_NAMELEN); + pit->pit_desc.dtpd_id = idp->di_id; + pit->pit_matches++; + + return (pit->pit_func(pit->pit_hdl, &pit->pit_desc, pit->pit_arg)); +} + +int +dtrace_probe_iter(dtrace_hdl_t *dtp, + const dtrace_probedesc_t *pdp, dtrace_probe_f *func, void *arg) +{ + const char *provider = pdp ? pdp->dtpd_provider : NULL; + dtrace_id_t id = DTRACE_IDNONE; + + dtrace_probedesc_t pd; + dt_probe_iter_t pit; + int cmd, rv; + + bzero(&pit, sizeof (pit)); + pit.pit_hdl = dtp; + pit.pit_func = func; + pit.pit_arg = arg; + pit.pit_pat = pdp ? pdp->dtpd_name : NULL; + + for (pit.pit_pvp = dt_list_next(&dtp->dt_provlist); + pit.pit_pvp != NULL; pit.pit_pvp = dt_list_next(pit.pit_pvp)) { + + if (pit.pit_pvp->pv_flags & DT_PROVIDER_IMPL) + continue; /* we'll get these later using dt_ioctl() */ + + if (!dt_gmatch(pit.pit_pvp->pv_desc.dtvd_name, provider)) + continue; + + (void) strlcpy(pit.pit_desc.dtpd_provider, + pit.pit_pvp->pv_desc.dtvd_name, DTRACE_PROVNAMELEN); + + if ((rv = dt_idhash_iter(pit.pit_pvp->pv_probes, + (dt_idhash_f *)dt_probe_iter, &pit)) != 0) + return (rv); + } + + if (pdp != NULL) + cmd = DTRACEIOC_PROBEMATCH; + else + cmd = DTRACEIOC_PROBES; + + for (;;) { + if (pdp != NULL) + bcopy(pdp, &pd, sizeof (pd)); + + pd.dtpd_id = id; + + if (dt_ioctl(dtp, cmd, &pd) != 0) + break; + else if ((rv = func(dtp, &pd, arg)) != 0) + return (rv); + + pit.pit_matches++; + id = pd.dtpd_id + 1; + } + + switch (errno) { + case ESRCH: + case EBADF: + return (pit.pit_matches ? 0 : dt_set_errno(dtp, EDT_NOPROBE)); + case EINVAL: + return (dt_set_errno(dtp, EDT_BADPGLOB)); + default: + return (dt_set_errno(dtp, errno)); + } +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.h new file mode 100644 index 0000000..2752baa --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.h @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_PROVIDER_H +#define _DT_PROVIDER_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <dt_impl.h> +#include <dt_ident.h> +#include <dt_list.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dt_provider { + dt_list_t pv_list; /* list forward/back pointers */ + struct dt_provider *pv_next; /* pointer to next provider in hash */ + dtrace_providerdesc_t pv_desc; /* provider name and attributes */ + dt_idhash_t *pv_probes; /* probe defs (if user-declared) */ + dt_node_t *pv_nodes; /* parse node allocation list */ + ulong_t *pv_xrefs; /* translator reference bitmap */ + ulong_t pv_xrmax; /* number of valid bits in pv_xrefs */ + ulong_t pv_gen; /* generation # that created me */ + dtrace_hdl_t *pv_hdl; /* pointer to containing dtrace_hdl */ + uint_t pv_flags; /* flags (see below) */ +} dt_provider_t; + +#define DT_PROVIDER_INTF 0x1 /* provider interface declaration */ +#define DT_PROVIDER_IMPL 0x2 /* provider implementation is loaded */ + +typedef struct dt_probe_iter { + dtrace_probedesc_t pit_desc; /* description storage */ + dtrace_hdl_t *pit_hdl; /* libdtrace handle */ + dt_provider_t *pit_pvp; /* current provider */ + const char *pit_pat; /* caller's name pattern (or NULL) */ + dtrace_probe_f *pit_func; /* caller's function */ + void *pit_arg; /* caller's argument */ + uint_t pit_matches; /* number of matches */ +} dt_probe_iter_t; + +typedef struct dt_probe_instance { + char *pi_fname; /* function name */ + char *pi_rname; /* mangled relocation name */ + uint32_t *pi_offs; /* offsets into the function */ + uint32_t *pi_enoffs; /* is-enabled offsets */ + uint_t pi_noffs; /* number of offsets */ + uint_t pi_maxoffs; /* size of pi_offs allocation */ + uint_t pi_nenoffs; /* number of is-enabled offsets */ + uint_t pi_maxenoffs; /* size of pi_enoffs allocation */ + struct dt_probe_instance *pi_next; /* next instance in the list */ +} dt_probe_instance_t; + +typedef struct dt_probe { + dt_provider_t *pr_pvp; /* pointer to containing provider */ + dt_ident_t *pr_ident; /* pointer to probe identifier */ + const char *pr_name; /* pointer to name component */ + dt_node_t *pr_nargs; /* native argument list */ + dt_node_t **pr_nargv; /* native argument vector */ + uint_t pr_nargc; /* native argument count */ + dt_node_t *pr_xargs; /* translated argument list */ + dt_node_t **pr_xargv; /* translated argument vector */ + uint_t pr_xargc; /* translated argument count */ + uint8_t *pr_mapping; /* translated argument mapping */ + dt_probe_instance_t *pr_inst; /* list of functions and offsets */ + dtrace_typeinfo_t *pr_argv; /* output argument types */ + int pr_argc; /* output argument count */ +} dt_probe_t; + +extern dt_provider_t *dt_provider_lookup(dtrace_hdl_t *, const char *); +extern dt_provider_t *dt_provider_create(dtrace_hdl_t *, const char *); +extern void dt_provider_destroy(dtrace_hdl_t *, dt_provider_t *); +extern int dt_provider_xref(dtrace_hdl_t *, dt_provider_t *, id_t); + +extern dt_probe_t *dt_probe_create(dtrace_hdl_t *, dt_ident_t *, int, + dt_node_t *, uint_t, dt_node_t *, uint_t); + +extern dt_probe_t *dt_probe_info(dtrace_hdl_t *, + const dtrace_probedesc_t *, dtrace_probeinfo_t *); + +extern dt_probe_t *dt_probe_lookup(dt_provider_t *, const char *); +extern void dt_probe_declare(dt_provider_t *, dt_probe_t *); +extern void dt_probe_destroy(dt_probe_t *); + +extern int dt_probe_define(dt_provider_t *, dt_probe_t *, + const char *, const char *, uint32_t, int); + +extern dt_node_t *dt_probe_tag(dt_probe_t *, uint_t, dt_node_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_PROVIDER_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_regset.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_regset.c new file mode 100644 index 0000000..0c747ed --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_regset.c @@ -0,0 +1,132 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/bitmap.h> +#include <assert.h> +#include <strings.h> +#include <stdlib.h> + +#include <dt_regset.h> +#include <dt_impl.h> + +dt_regset_t * +dt_regset_create(ulong_t nregs) +{ + ulong_t n = BT_BITOUL(nregs); + dt_regset_t *drp = malloc(sizeof (dt_regset_t)); + + if (drp == NULL) + return (NULL); + + drp->dr_bitmap = malloc(sizeof (ulong_t) * n); + drp->dr_size = nregs; + + if (drp->dr_bitmap == NULL) { + dt_regset_destroy(drp); + return (NULL); + } + + bzero(drp->dr_bitmap, sizeof (ulong_t) * n); + return (drp); +} + +void +dt_regset_destroy(dt_regset_t *drp) +{ + free(drp->dr_bitmap); + free(drp); +} + +void +dt_regset_reset(dt_regset_t *drp) +{ + bzero(drp->dr_bitmap, sizeof (ulong_t) * BT_BITOUL(drp->dr_size)); +} + +void +dt_regset_assert_free(dt_regset_t *drp) +{ + int reg; + boolean_t fail = B_FALSE; + for (reg = 0; reg < drp->dr_size; reg++) { + if (BT_TEST(drp->dr_bitmap, reg) != 0) { + dt_dprintf("%%r%d was left allocated\n", reg); + fail = B_TRUE; + } + } + + /* + * We set this during dtest runs to check for register leaks. + */ + if (fail && getenv("DTRACE_DEBUG_REGSET") != NULL) + abort(); +} + +int +dt_regset_alloc(dt_regset_t *drp) +{ + ulong_t nbits = drp->dr_size - 1; + ulong_t maxw = nbits >> BT_ULSHIFT; + ulong_t wx; + + for (wx = 0; wx <= maxw; wx++) { + if (drp->dr_bitmap[wx] != ~0UL) + break; + } + + if (wx <= maxw) { + ulong_t maxb = (wx == maxw) ? nbits & BT_ULMASK : BT_NBIPUL - 1; + ulong_t word = drp->dr_bitmap[wx]; + ulong_t bit, bx; + int reg; + + for (bit = 1, bx = 0; bx <= maxb; bx++, bit <<= 1) { + if ((word & bit) == 0) { + reg = (int)((wx << BT_ULSHIFT) | bx); + BT_SET(drp->dr_bitmap, reg); + return (reg); + } + } + } + + xyerror(D_NOREG, "Insufficient registers to generate code"); + /*NOTREACHED*/ + return (-1); +} + +void +dt_regset_free(dt_regset_t *drp, int reg) +{ + assert(reg >= 0 && reg < drp->dr_size); + assert(BT_TEST(drp->dr_bitmap, reg) != 0); + BT_CLEAR(drp->dr_bitmap, reg); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_regset.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_regset.h new file mode 100644 index 0000000..3508284 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_regset.h @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#ifndef _DT_REGSET_H +#define _DT_REGSET_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dt_regset { + ulong_t dr_size; /* number of registers in set */ + ulong_t *dr_bitmap; /* bitmap of active registers */ +} dt_regset_t; + +extern dt_regset_t *dt_regset_create(ulong_t); +extern void dt_regset_destroy(dt_regset_t *); +extern void dt_regset_reset(dt_regset_t *); +extern int dt_regset_alloc(dt_regset_t *); +extern void dt_regset_free(dt_regset_t *, int); +extern void dt_regset_assert_free(dt_regset_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_REGSET_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.c new file mode 100644 index 0000000..782d66c --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.c @@ -0,0 +1,309 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <strings.h> +#include <stdlib.h> +#include <errno.h> +#include <ctype.h> + +#include <dt_string.h> + +/* + * Transform string s inline, converting each embedded C escape sequence string + * to the corresponding character. For example, the substring "\n" is replaced + * by an inline '\n' character. The length of the resulting string is returned. + */ +size_t +stresc2chr(char *s) +{ + char *p, *q, c; + int esc = 0; + int x; + + for (p = q = s; (c = *p) != '\0'; p++) { + if (esc) { + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + c -= '0'; + p++; + + if (*p >= '0' && *p <= '7') { + c = c * 8 + *p++ - '0'; + + if (*p >= '0' && *p <= '7') + c = c * 8 + *p - '0'; + else + p--; + } else + p--; + + *q++ = c; + break; + + case 'a': + *q++ = '\a'; + break; + case 'b': + *q++ = '\b'; + break; + case 'f': + *q++ = '\f'; + break; + case 'n': + *q++ = '\n'; + break; + case 'r': + *q++ = '\r'; + break; + case 't': + *q++ = '\t'; + break; + case 'v': + *q++ = '\v'; + break; + + case 'x': + for (x = 0; (c = *++p) != '\0'; ) { + if (c >= '0' && c <= '9') + x = x * 16 + c - '0'; + else if (c >= 'a' && c <= 'f') + x = x * 16 + c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + x = x * 16 + c - 'A' + 10; + else + break; + } + *q++ = (char)x; + p--; + break; + + case '"': + case '\\': + *q++ = c; + break; + default: + *q++ = '\\'; + *q++ = c; + } + + esc = 0; + + } else { + if ((esc = c == '\\') == 0) + *q++ = c; + } + } + + *q = '\0'; + return ((size_t)(q - s)); +} + +/* + * Create a copy of string s in which certain unprintable or special characters + * have been converted to the string representation of their C escape sequence. + * For example, the newline character is expanded to the string "\n". + */ +char * +strchr2esc(const char *s, size_t n) +{ + const char *p; + char *q, *s2, c; + size_t addl = 0; + + for (p = s; p < s + n; p++) { + switch (c = *p) { + case '\0': + case '\a': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + case '"': + case '\\': + addl++; /* 1 add'l char needed to follow \ */ + break; + case ' ': + break; + default: + if (c < '!' || c > '~') + addl += 3; /* 3 add'l chars following \ */ + } + } + + if ((s2 = malloc(n + addl + 1)) == NULL) + return (NULL); + + for (p = s, q = s2; p < s + n; p++) { + switch (c = *p) { + case '\0': + *q++ = '\\'; + *q++ = '0'; + break; + case '\a': + *q++ = '\\'; + *q++ = 'a'; + break; + case '\b': + *q++ = '\\'; + *q++ = 'b'; + break; + case '\f': + *q++ = '\\'; + *q++ = 'f'; + break; + case '\n': + *q++ = '\\'; + *q++ = 'n'; + break; + case '\r': + *q++ = '\\'; + *q++ = 'r'; + break; + case '\t': + *q++ = '\\'; + *q++ = 't'; + break; + case '\v': + *q++ = '\\'; + *q++ = 'v'; + break; + case '"': + *q++ = '\\'; + *q++ = '"'; + break; + case '\\': + *q++ = '\\'; + *q++ = '\\'; + break; + case ' ': + *q++ = c; + break; + default: + if (c < '!' || c > '~') { + *q++ = '\\'; + *q++ = ((c >> 6) & 3) + '0'; + *q++ = ((c >> 3) & 7) + '0'; + *q++ = (c & 7) + '0'; + } else + *q++ = c; + } + + if (c == '\0') + break; /* don't continue past \0 even if p < s + n */ + } + + *q = '\0'; + return (s2); +} + +/* + * Return the basename (name after final /) of the given string. We use + * strbasename rather than basename to avoid conflicting with libgen.h's + * non-const function prototype. + */ +const char * +strbasename(const char *s) +{ + const char *p = strrchr(s, '/'); + + if (p == NULL) + return (s); + + return (++p); +} + +/* + * This function tests a string against the regular expression used for idents + * and integers in the D lexer, and should match the superset of RGX_IDENT and + * RGX_INT in dt_lex.l. If an invalid character is found, the function returns + * a pointer to it. Otherwise NULL is returned for a valid string. + */ +const char * +strbadidnum(const char *s) +{ + char *p; + int c; + + if (*s == '\0') + return (s); + + errno = 0; + (void) strtoull(s, &p, 0); + + if (errno == 0 && *p == '\0') + return (NULL); /* matches RGX_INT */ + + while ((c = *s++) != '\0') { + if (isalnum(c) == 0 && c != '_' && c != '`') + return (s - 1); + } + + return (NULL); /* matches RGX_IDENT */ +} + +/* + * Determine whether the string contains a glob matching pattern or is just a + * simple string. See gmatch(3GEN) and sh(1) for the glob syntax definition. + */ +int +strisglob(const char *s) +{ + char c; + + while ((c = *s++) != '\0') { + if (c == '[' || c == '?' || c == '*' || c == '\\') + return (1); + } + + return (0); +} + +/* + * Hyphenate a string in-place by converting any instances of "__" to "-", + * which we use for probe names to improve readability, and return the string. + */ +char * +strhyphenate(char *s) +{ + char *p, *q; + + for (p = s, q = p + strlen(p); p < q; p++) { + if (p[0] == '_' && p[1] == '_') { + p[0] = '-'; + bcopy(p + 2, p + 1, (size_t)(q - p) - 1); + } + } + + return (s); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.h new file mode 100644 index 0000000..6ee626d --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.h @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _DT_STRING_H +#define _DT_STRING_H + + +#include <sys/types.h> +#include <strings.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern size_t stresc2chr(char *); +extern char *strchr2esc(const char *, size_t); +extern const char *strbasename(const char *); +extern const char *strbadidnum(const char *); +extern int strisglob(const char *); +extern char *strhyphenate(char *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_STRING_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_strtab.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_strtab.c new file mode 100644 index 0000000..cf6bc48 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_strtab.c @@ -0,0 +1,293 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <strings.h> +#include <stdlib.h> +#include <assert.h> + +#include <dt_strtab.h> +#include <dt_impl.h> + +static int +dt_strtab_grow(dt_strtab_t *sp) +{ + char *ptr, **bufs; + + if ((ptr = malloc(sp->str_bufsz)) == NULL) + return (-1); + + bufs = realloc(sp->str_bufs, (sp->str_nbufs + 1) * sizeof (char *)); + + if (bufs == NULL) { + free(ptr); + return (-1); + } + + sp->str_nbufs++; + sp->str_bufs = bufs; + sp->str_ptr = ptr; + sp->str_bufs[sp->str_nbufs - 1] = sp->str_ptr; + + return (0); +} + +dt_strtab_t * +dt_strtab_create(size_t bufsz) +{ + dt_strtab_t *sp = malloc(sizeof (dt_strtab_t)); + uint_t nbuckets = _dtrace_strbuckets; + + assert(bufsz != 0); + + if (sp == NULL) + return (NULL); + + bzero(sp, sizeof (dt_strtab_t)); + sp->str_hash = malloc(nbuckets * sizeof (dt_strhash_t *)); + + if (sp->str_hash == NULL) + goto err; + + bzero(sp->str_hash, nbuckets * sizeof (dt_strhash_t *)); + sp->str_hashsz = nbuckets; + sp->str_bufs = NULL; + sp->str_ptr = NULL; + sp->str_nbufs = 0; + sp->str_bufsz = bufsz; + sp->str_nstrs = 1; + sp->str_size = 1; + + if (dt_strtab_grow(sp) == -1) + goto err; + + *sp->str_ptr++ = '\0'; + return (sp); + +err: + dt_strtab_destroy(sp); + return (NULL); +} + +void +dt_strtab_destroy(dt_strtab_t *sp) +{ + dt_strhash_t *hp, *hq; + ulong_t i; + + for (i = 0; i < sp->str_hashsz; i++) { + for (hp = sp->str_hash[i]; hp != NULL; hp = hq) { + hq = hp->str_next; + free(hp); + } + } + + for (i = 0; i < sp->str_nbufs; i++) + free(sp->str_bufs[i]); + + if (sp->str_hash != NULL) + free(sp->str_hash); + if (sp->str_bufs != NULL) + free(sp->str_bufs); + + free(sp); +} + +ulong_t +dt_strtab_hash(const char *key, size_t *len) +{ + ulong_t g, h = 0; + const char *p; + size_t n = 0; + + for (p = key; *p != '\0'; p++, n++) { + h = (h << 4) + *p; + + if ((g = (h & 0xf0000000)) != 0) { + h ^= (g >> 24); + h ^= g; + } + } + + if (len != NULL) + *len = n; + + return (h); +} + +static int +dt_strtab_compare(dt_strtab_t *sp, dt_strhash_t *hp, + const char *str, size_t len) +{ + ulong_t b = hp->str_buf; + const char *buf = hp->str_data; + size_t resid, n; + int rv; + + while (len != 0) { + if (buf == sp->str_bufs[b] + sp->str_bufsz) + buf = sp->str_bufs[++b]; + + resid = sp->str_bufs[b] + sp->str_bufsz - buf; + n = MIN(resid, len); + + if ((rv = strncmp(buf, str, n)) != 0) + return (rv); + + buf += n; + str += n; + len -= n; + } + + return (0); +} + +static int +dt_strtab_copyin(dt_strtab_t *sp, const char *str, size_t len) +{ + char *old_p = sp->str_ptr; + ulong_t old_n = sp->str_nbufs; + + ulong_t b = sp->str_nbufs - 1; + size_t resid, n; + + while (len != 0) { + if (sp->str_ptr == sp->str_bufs[b] + sp->str_bufsz) { + if (dt_strtab_grow(sp) == -1) + goto err; + b++; + } + + resid = sp->str_bufs[b] + sp->str_bufsz - sp->str_ptr; + n = MIN(resid, len); + bcopy(str, sp->str_ptr, n); + + sp->str_ptr += n; + str += n; + len -= n; + } + + return (0); + +err: + while (sp->str_nbufs != old_n) + free(sp->str_bufs[--sp->str_nbufs]); + + sp->str_ptr = old_p; + return (-1); +} + +ssize_t +dt_strtab_index(dt_strtab_t *sp, const char *str) +{ + dt_strhash_t *hp; + size_t len; + ulong_t h; + + if (str == NULL || str[0] == '\0') + return (0); /* we keep a \0 at offset 0 to simplify things */ + + h = dt_strtab_hash(str, &len) % sp->str_hashsz; + + for (hp = sp->str_hash[h]; hp != NULL; hp = hp->str_next) { + if (dt_strtab_compare(sp, hp, str, len + 1) == 0) + return (hp->str_off); + } + + return (-1); +} + +ssize_t +dt_strtab_insert(dt_strtab_t *sp, const char *str) +{ + dt_strhash_t *hp; + size_t len; + ssize_t off; + ulong_t h; + + if ((off = dt_strtab_index(sp, str)) != -1) + return (off); + + h = dt_strtab_hash(str, &len) % sp->str_hashsz; + + /* + * Create a new hash bucket, initialize it, and insert it at the front + * of the hash chain for the appropriate bucket. + */ + if ((hp = malloc(sizeof (dt_strhash_t))) == NULL) + return (-1L); + + hp->str_data = sp->str_ptr; + hp->str_buf = sp->str_nbufs - 1; + hp->str_off = sp->str_size; + hp->str_len = len; + hp->str_next = sp->str_hash[h]; + + /* + * Now copy the string data into our buffer list, and then update + * the global counts of strings and bytes. Return str's byte offset. + */ + if (dt_strtab_copyin(sp, str, len + 1) == -1) + return (-1L); + + sp->str_nstrs++; + sp->str_size += len + 1; + sp->str_hash[h] = hp; + + return (hp->str_off); +} + +size_t +dt_strtab_size(const dt_strtab_t *sp) +{ + return (sp->str_size); +} + +ssize_t +dt_strtab_write(const dt_strtab_t *sp, dt_strtab_write_f *func, void *private) +{ + ssize_t res, total = 0; + ulong_t i; + size_t n; + + for (i = 0; i < sp->str_nbufs; i++, total += res) { + if (i == sp->str_nbufs - 1) + n = sp->str_ptr - sp->str_bufs[i]; + else + n = sp->str_bufsz; + + if ((res = func(sp->str_bufs[i], n, total, private)) <= 0) + break; + } + + if (total == 0 && sp->str_size != 0) + return (-1); + + return (total); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_strtab.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_strtab.h new file mode 100644 index 0000000..551dabb --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_strtab.h @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_STRTAB_H +#define _DT_STRTAB_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dt_strhash { + const char *str_data; /* pointer to actual string data */ + ulong_t str_buf; /* index of string data buffer */ + size_t str_off; /* offset in bytes of this string */ + size_t str_len; /* length in bytes of this string */ + struct dt_strhash *str_next; /* next string in hash chain */ +} dt_strhash_t; + +typedef struct dt_strtab { + dt_strhash_t **str_hash; /* array of hash buckets */ + ulong_t str_hashsz; /* size of hash bucket array */ + char **str_bufs; /* array of buffer pointers */ + char *str_ptr; /* pointer to current buffer location */ + ulong_t str_nbufs; /* size of buffer pointer array */ + size_t str_bufsz; /* size of individual buffer */ + ulong_t str_nstrs; /* total number of strings in strtab */ + size_t str_size; /* total size of strings in bytes */ +} dt_strtab_t; + +typedef ssize_t dt_strtab_write_f(const char *, size_t, size_t, void *); + +extern dt_strtab_t *dt_strtab_create(size_t); +extern void dt_strtab_destroy(dt_strtab_t *); +extern ssize_t dt_strtab_index(dt_strtab_t *, const char *); +extern ssize_t dt_strtab_insert(dt_strtab_t *, const char *); +extern size_t dt_strtab_size(const dt_strtab_t *); +extern ssize_t dt_strtab_write(const dt_strtab_t *, + dt_strtab_write_f *, void *); +extern ulong_t dt_strtab_hash(const char *, size_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_STRTAB_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_subr.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_subr.c new file mode 100644 index 0000000..4429019 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_subr.c @@ -0,0 +1,984 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(sun) +#include <sys/sysmacros.h> +#endif +#include <sys/isa_defs.h> + +#include <strings.h> +#include <unistd.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <ctype.h> +#if defined(sun) +#include <alloca.h> +#else +#include <sys/sysctl.h> +#include <libproc_compat.h> +#endif +#include <assert.h> +#include <libgen.h> +#include <limits.h> +#include <stdint.h> + +#include <dt_impl.h> + +static const struct { + size_t dtps_offset; + size_t dtps_len; +} dtrace_probespecs[] = { + { offsetof(dtrace_probedesc_t, dtpd_provider), DTRACE_PROVNAMELEN }, + { offsetof(dtrace_probedesc_t, dtpd_mod), DTRACE_MODNAMELEN }, + { offsetof(dtrace_probedesc_t, dtpd_func), DTRACE_FUNCNAMELEN }, + { offsetof(dtrace_probedesc_t, dtpd_name), DTRACE_NAMELEN } +}; + +int +dtrace_xstr2desc(dtrace_hdl_t *dtp, dtrace_probespec_t spec, + const char *s, int argc, char *const argv[], dtrace_probedesc_t *pdp) +{ + size_t off, len, vlen, wlen; + const char *p, *q, *v, *w; + + char buf[32]; /* for id_t as %d (see below) */ + + if (spec < DTRACE_PROBESPEC_NONE || spec > DTRACE_PROBESPEC_NAME) + return (dt_set_errno(dtp, EINVAL)); + + bzero(pdp, sizeof (dtrace_probedesc_t)); + p = s + strlen(s) - 1; + + do { + for (len = 0; p >= s && *p != ':'; len++) + p--; /* move backward until we find a delimiter */ + + q = p + 1; + vlen = 0; + w = NULL; + wlen = 0; + + if ((v = strchr(q, '$')) != NULL && v < q + len) { + /* + * Set vlen to the length of the variable name and then + * reset len to the length of the text prior to '$'. If + * the name begins with a digit, interpret it using the + * the argv[] array. Otherwise we look in dt_macros. + * For the moment, all dt_macros variables are of type + * id_t (see dtrace_update() for more details on that). + */ + vlen = (size_t)(q + len - v); + len = (size_t)(v - q); + + /* + * If the variable string begins with $$, skip past the + * leading dollar sign since $ and $$ are equivalent + * macro reference operators in a probe description. + */ + if (vlen > 2 && v[1] == '$') { + vlen--; + v++; + } + + if (isdigit(v[1])) { + long i; + + errno = 0; + i = strtol(v + 1, (char **)&w, 10); + + wlen = vlen - (w - v); + + if (i < 0 || i >= argc || errno != 0) + return (dt_set_errno(dtp, EDT_BADSPCV)); + + v = argv[i]; + vlen = strlen(v); + + if (yypcb != NULL && yypcb->pcb_sargv == argv) + yypcb->pcb_sflagv[i] |= DT_IDFLG_REF; + + } else if (vlen > 1) { + char *vstr = alloca(vlen); + dt_ident_t *idp; + + (void) strncpy(vstr, v + 1, vlen - 1); + vstr[vlen - 1] = '\0'; + idp = dt_idhash_lookup(dtp->dt_macros, vstr); + + if (idp == NULL) + return (dt_set_errno(dtp, EDT_BADSPCV)); + + v = buf; + vlen = snprintf(buf, 32, "%d", idp->di_id); + + } else + return (dt_set_errno(dtp, EDT_BADSPCV)); + } + + if (spec == DTRACE_PROBESPEC_NONE) + return (dt_set_errno(dtp, EDT_BADSPEC)); + + if (len + vlen >= dtrace_probespecs[spec].dtps_len) + return (dt_set_errno(dtp, ENAMETOOLONG)); + + off = dtrace_probespecs[spec--].dtps_offset; + bcopy(q, (char *)pdp + off, len); + bcopy(v, (char *)pdp + off + len, vlen); + bcopy(w, (char *)pdp + off + len + vlen, wlen); + } while (--p >= s); + + pdp->dtpd_id = DTRACE_IDNONE; + return (0); +} + +int +dtrace_str2desc(dtrace_hdl_t *dtp, dtrace_probespec_t spec, + const char *s, dtrace_probedesc_t *pdp) +{ + return (dtrace_xstr2desc(dtp, spec, s, 0, NULL, pdp)); +} + +int +dtrace_id2desc(dtrace_hdl_t *dtp, dtrace_id_t id, dtrace_probedesc_t *pdp) +{ + bzero(pdp, sizeof (dtrace_probedesc_t)); + pdp->dtpd_id = id; + + if (dt_ioctl(dtp, DTRACEIOC_PROBES, pdp) == -1 || + pdp->dtpd_id != id) + return (dt_set_errno(dtp, EDT_BADID)); + + return (0); +} + +char * +dtrace_desc2str(const dtrace_probedesc_t *pdp, char *buf, size_t len) +{ + if (pdp->dtpd_id == 0) { + (void) snprintf(buf, len, "%s:%s:%s:%s", pdp->dtpd_provider, + pdp->dtpd_mod, pdp->dtpd_func, pdp->dtpd_name); + } else + (void) snprintf(buf, len, "%u", pdp->dtpd_id); + + return (buf); +} + +char * +dtrace_attr2str(dtrace_attribute_t attr, char *buf, size_t len) +{ + const char *name = dtrace_stability_name(attr.dtat_name); + const char *data = dtrace_stability_name(attr.dtat_data); + const char *class = dtrace_class_name(attr.dtat_class); + + if (name == NULL || data == NULL || class == NULL) + return (NULL); /* one or more invalid attributes */ + + (void) snprintf(buf, len, "%s/%s/%s", name, data, class); + return (buf); +} + +static char * +dt_getstrattr(char *p, char **qp) +{ + char *q; + + if (*p == '\0') + return (NULL); + + if ((q = strchr(p, '/')) == NULL) + q = p + strlen(p); + else + *q++ = '\0'; + + *qp = q; + return (p); +} + +int +dtrace_str2attr(const char *str, dtrace_attribute_t *attr) +{ + dtrace_stability_t s; + dtrace_class_t c; + char *p, *q; + + if (str == NULL || attr == NULL) + return (-1); /* invalid function arguments */ + + *attr = _dtrace_maxattr; + p = alloca(strlen(str) + 1); + (void) strcpy(p, str); + + if ((p = dt_getstrattr(p, &q)) == NULL) + return (0); + + for (s = 0; s <= DTRACE_STABILITY_MAX; s++) { + if (strcasecmp(p, dtrace_stability_name(s)) == 0) { + attr->dtat_name = s; + break; + } + } + + if (s > DTRACE_STABILITY_MAX) + return (-1); + + if ((p = dt_getstrattr(q, &q)) == NULL) + return (0); + + for (s = 0; s <= DTRACE_STABILITY_MAX; s++) { + if (strcasecmp(p, dtrace_stability_name(s)) == 0) { + attr->dtat_data = s; + break; + } + } + + if (s > DTRACE_STABILITY_MAX) + return (-1); + + if ((p = dt_getstrattr(q, &q)) == NULL) + return (0); + + for (c = 0; c <= DTRACE_CLASS_MAX; c++) { + if (strcasecmp(p, dtrace_class_name(c)) == 0) { + attr->dtat_class = c; + break; + } + } + + if (c > DTRACE_CLASS_MAX || (p = dt_getstrattr(q, &q)) != NULL) + return (-1); + + return (0); +} + +const char * +dtrace_stability_name(dtrace_stability_t s) +{ + switch (s) { + case DTRACE_STABILITY_INTERNAL: return ("Internal"); + case DTRACE_STABILITY_PRIVATE: return ("Private"); + case DTRACE_STABILITY_OBSOLETE: return ("Obsolete"); + case DTRACE_STABILITY_EXTERNAL: return ("External"); + case DTRACE_STABILITY_UNSTABLE: return ("Unstable"); + case DTRACE_STABILITY_EVOLVING: return ("Evolving"); + case DTRACE_STABILITY_STABLE: return ("Stable"); + case DTRACE_STABILITY_STANDARD: return ("Standard"); + default: return (NULL); + } +} + +const char * +dtrace_class_name(dtrace_class_t c) +{ + switch (c) { + case DTRACE_CLASS_UNKNOWN: return ("Unknown"); + case DTRACE_CLASS_CPU: return ("CPU"); + case DTRACE_CLASS_PLATFORM: return ("Platform"); + case DTRACE_CLASS_GROUP: return ("Group"); + case DTRACE_CLASS_ISA: return ("ISA"); + case DTRACE_CLASS_COMMON: return ("Common"); + default: return (NULL); + } +} + +dtrace_attribute_t +dt_attr_min(dtrace_attribute_t a1, dtrace_attribute_t a2) +{ + dtrace_attribute_t am; + + am.dtat_name = MIN(a1.dtat_name, a2.dtat_name); + am.dtat_data = MIN(a1.dtat_data, a2.dtat_data); + am.dtat_class = MIN(a1.dtat_class, a2.dtat_class); + + return (am); +} + +dtrace_attribute_t +dt_attr_max(dtrace_attribute_t a1, dtrace_attribute_t a2) +{ + dtrace_attribute_t am; + + am.dtat_name = MAX(a1.dtat_name, a2.dtat_name); + am.dtat_data = MAX(a1.dtat_data, a2.dtat_data); + am.dtat_class = MAX(a1.dtat_class, a2.dtat_class); + + return (am); +} + +/* + * Compare two attributes and return an integer value in the following ranges: + * + * <0 if any of a1's attributes are less than a2's attributes + * =0 if all of a1's attributes are equal to a2's attributes + * >0 if all of a1's attributes are greater than or equal to a2's attributes + * + * To implement this function efficiently, we subtract a2's attributes from + * a1's to obtain a negative result if an a1 attribute is less than its a2 + * counterpart. We then OR the intermediate results together, relying on the + * twos-complement property that if any result is negative, the bitwise union + * will also be negative since the highest bit will be set in the result. + */ +int +dt_attr_cmp(dtrace_attribute_t a1, dtrace_attribute_t a2) +{ + return (((int)a1.dtat_name - a2.dtat_name) | + ((int)a1.dtat_data - a2.dtat_data) | + ((int)a1.dtat_class - a2.dtat_class)); +} + +char * +dt_attr_str(dtrace_attribute_t a, char *buf, size_t len) +{ + static const char stability[] = "ipoxuesS"; + static const char class[] = "uCpgIc"; + + if (a.dtat_name < sizeof (stability) && + a.dtat_data < sizeof (stability) && a.dtat_class < sizeof (class)) { + (void) snprintf(buf, len, "[%c/%c/%c]", stability[a.dtat_name], + stability[a.dtat_data], class[a.dtat_class]); + } else { + (void) snprintf(buf, len, "[%u/%u/%u]", + a.dtat_name, a.dtat_data, a.dtat_class); + } + + return (buf); +} + +char * +dt_version_num2str(dt_version_t v, char *buf, size_t len) +{ + uint_t M = DT_VERSION_MAJOR(v); + uint_t m = DT_VERSION_MINOR(v); + uint_t u = DT_VERSION_MICRO(v); + + if (u == 0) + (void) snprintf(buf, len, "%u.%u", M, m); + else + (void) snprintf(buf, len, "%u.%u.%u", M, m, u); + + return (buf); +} + +int +dt_version_str2num(const char *s, dt_version_t *vp) +{ + int i = 0, n[3] = { 0, 0, 0 }; + char c; + + while ((c = *s++) != '\0') { + if (isdigit(c)) + n[i] = n[i] * 10 + c - '0'; + else if (c != '.' || i++ >= sizeof (n) / sizeof (n[0]) - 1) + return (-1); + } + + if (n[0] > DT_VERSION_MAJMAX || + n[1] > DT_VERSION_MINMAX || + n[2] > DT_VERSION_MICMAX) + return (-1); + + if (vp != NULL) + *vp = DT_VERSION_NUMBER(n[0], n[1], n[2]); + + return (0); +} + +int +dt_version_defined(dt_version_t v) +{ + int i; + + for (i = 0; _dtrace_versions[i] != 0; i++) { + if (_dtrace_versions[i] == v) + return (1); + } + + return (0); +} + +char * +dt_cpp_add_arg(dtrace_hdl_t *dtp, const char *str) +{ + char *arg; + + if (dtp->dt_cpp_argc == dtp->dt_cpp_args) { + int olds = dtp->dt_cpp_args; + int news = olds * 2; + char **argv = realloc(dtp->dt_cpp_argv, sizeof (char *) * news); + + if (argv == NULL) + return (NULL); + + bzero(&argv[olds], sizeof (char *) * olds); + dtp->dt_cpp_argv = argv; + dtp->dt_cpp_args = news; + } + + if ((arg = strdup(str)) == NULL) + return (NULL); + + assert(dtp->dt_cpp_argc < dtp->dt_cpp_args); + dtp->dt_cpp_argv[dtp->dt_cpp_argc++] = arg; + return (arg); +} + +char * +dt_cpp_pop_arg(dtrace_hdl_t *dtp) +{ + char *arg; + + if (dtp->dt_cpp_argc <= 1) + return (NULL); /* dt_cpp_argv[0] cannot be popped */ + + arg = dtp->dt_cpp_argv[--dtp->dt_cpp_argc]; + dtp->dt_cpp_argv[dtp->dt_cpp_argc] = NULL; + + return (arg); +} + +/*PRINTFLIKE1*/ +void +dt_dprintf(const char *format, ...) +{ + if (_dtrace_debug) { + va_list alist; + + va_start(alist, format); + (void) fputs("libdtrace DEBUG: ", stderr); + (void) vfprintf(stderr, format, alist); + va_end(alist); + } +} + +int +#if defined(sun) +dt_ioctl(dtrace_hdl_t *dtp, int val, void *arg) +#else +dt_ioctl(dtrace_hdl_t *dtp, u_long val, void *arg) +#endif +{ + const dtrace_vector_t *v = dtp->dt_vector; + +#if !defined(sun) + /* Avoid sign extension. */ + val &= 0xffffffff; +#endif + + if (v != NULL) + return (v->dtv_ioctl(dtp->dt_varg, val, arg)); + + if (dtp->dt_fd >= 0) + return (ioctl(dtp->dt_fd, val, arg)); + + errno = EBADF; + return (-1); +} + +int +dt_status(dtrace_hdl_t *dtp, processorid_t cpu) +{ + const dtrace_vector_t *v = dtp->dt_vector; + + if (v == NULL) { +#if defined(sun) + return (p_online(cpu, P_STATUS)); +#else + int maxid = 0; + size_t len = sizeof(maxid); + if (sysctlbyname("kern.smp.maxid", &maxid, &len, NULL, 0) != 0) + return (cpu == 0 ? 1 : -1); + else + return (cpu <= maxid ? 1 : -1); +#endif + } + + return (v->dtv_status(dtp->dt_varg, cpu)); +} + +long +dt_sysconf(dtrace_hdl_t *dtp, int name) +{ + const dtrace_vector_t *v = dtp->dt_vector; + + if (v == NULL) + return (sysconf(name)); + + return (v->dtv_sysconf(dtp->dt_varg, name)); +} + +/* + * Wrapper around write(2) to handle partial writes. For maximum safety of + * output files and proper error reporting, we continuing writing in the + * face of partial writes until write(2) fails or 'buf' is completely written. + * We also record any errno in the specified dtrace_hdl_t as well as 'errno'. + */ +ssize_t +dt_write(dtrace_hdl_t *dtp, int fd, const void *buf, size_t n) +{ + ssize_t resid = n; + ssize_t len; + + while (resid != 0) { + if ((len = write(fd, buf, resid)) <= 0) + break; + + resid -= len; + buf = (char *)buf + len; + } + + if (resid == n && n != 0) + return (dt_set_errno(dtp, errno)); + + return (n - resid); +} + +/* + * This function handles all output from libdtrace, as well as the + * dtrace_sprintf() case. If we're here due to dtrace_sprintf(), then + * dt_sprintf_buflen will be non-zero; in this case, we sprintf into the + * specified buffer and return. Otherwise, if output is buffered (denoted by + * a NULL fp), we sprintf the desired output into the buffered buffer + * (expanding the buffer if required). If we don't satisfy either of these + * conditions (that is, if we are to actually generate output), then we call + * fprintf with the specified fp. In this case, we need to deal with one of + * the more annoying peculiarities of libc's printf routines: any failed + * write persistently sets an error flag inside the FILE causing every + * subsequent write to fail, but only the caller that initiated the error gets + * the errno. Since libdtrace clients often intercept SIGINT, this case is + * particularly frustrating since we don't want the EINTR on one attempt to + * write to the output file to preclude later attempts to write. This + * function therefore does a clearerr() if any error occurred, and saves the + * errno for the caller inside the specified dtrace_hdl_t. + */ +/*PRINTFLIKE3*/ +int +dt_printf(dtrace_hdl_t *dtp, FILE *fp, const char *format, ...) +{ + va_list ap; + int n; + +#if !defined(sun) + /* + * On FreeBSD, check if output is currently being re-directed + * to another file. If so, output to that file instead of the + * one the caller has specified. + */ + if (dtp->dt_freopen_fp != NULL) + fp = dtp->dt_freopen_fp; +#endif + + va_start(ap, format); + + if (dtp->dt_sprintf_buflen != 0) { + int len; + char *buf; + + assert(dtp->dt_sprintf_buf != NULL); + + buf = &dtp->dt_sprintf_buf[len = strlen(dtp->dt_sprintf_buf)]; + len = dtp->dt_sprintf_buflen - len; + assert(len >= 0); + + if ((n = vsnprintf(buf, len, format, ap)) < 0) + n = dt_set_errno(dtp, errno); + + va_end(ap); + + return (n); + } + + if (fp == NULL) { + int needed, rval; + size_t avail; + + /* + * Using buffered output is not allowed if a handler has + * not been installed. + */ + if (dtp->dt_bufhdlr == NULL) { + va_end(ap); + return (dt_set_errno(dtp, EDT_NOBUFFERED)); + } + + if (dtp->dt_buffered_buf == NULL) { + assert(dtp->dt_buffered_size == 0); + dtp->dt_buffered_size = 1; + dtp->dt_buffered_buf = malloc(dtp->dt_buffered_size); + + if (dtp->dt_buffered_buf == NULL) { + va_end(ap); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + dtp->dt_buffered_offs = 0; + dtp->dt_buffered_buf[0] = '\0'; + } + + if ((needed = vsnprintf(NULL, 0, format, ap)) < 0) { + rval = dt_set_errno(dtp, errno); + va_end(ap); + return (rval); + } + + if (needed == 0) { + va_end(ap); + return (0); + } + + for (;;) { + char *newbuf; + + assert(dtp->dt_buffered_offs < dtp->dt_buffered_size); + avail = dtp->dt_buffered_size - dtp->dt_buffered_offs; + + if (needed + 1 < avail) + break; + + if ((newbuf = realloc(dtp->dt_buffered_buf, + dtp->dt_buffered_size << 1)) == NULL) { + va_end(ap); + return (dt_set_errno(dtp, EDT_NOMEM)); + } + + dtp->dt_buffered_buf = newbuf; + dtp->dt_buffered_size <<= 1; + } + + if (vsnprintf(&dtp->dt_buffered_buf[dtp->dt_buffered_offs], + avail, format, ap) < 0) { + rval = dt_set_errno(dtp, errno); + va_end(ap); + return (rval); + } + + dtp->dt_buffered_offs += needed; + assert(dtp->dt_buffered_buf[dtp->dt_buffered_offs] == '\0'); + va_end(ap); + return (0); + } + + n = vfprintf(fp, format, ap); + fflush(fp); + va_end(ap); + + if (n < 0) { + clearerr(fp); + return (dt_set_errno(dtp, errno)); + } + + return (n); +} + +int +dt_buffered_flush(dtrace_hdl_t *dtp, dtrace_probedata_t *pdata, + const dtrace_recdesc_t *rec, const dtrace_aggdata_t *agg, uint32_t flags) +{ + dtrace_bufdata_t data; + + if (dtp->dt_buffered_offs == 0) + return (0); + + data.dtbda_handle = dtp; + data.dtbda_buffered = dtp->dt_buffered_buf; + data.dtbda_probe = pdata; + data.dtbda_recdesc = rec; + data.dtbda_aggdata = agg; + data.dtbda_flags = flags; + + if ((*dtp->dt_bufhdlr)(&data, dtp->dt_bufarg) == DTRACE_HANDLE_ABORT) + return (dt_set_errno(dtp, EDT_DIRABORT)); + + dtp->dt_buffered_offs = 0; + dtp->dt_buffered_buf[0] = '\0'; + + return (0); +} + +void +dt_buffered_destroy(dtrace_hdl_t *dtp) +{ + free(dtp->dt_buffered_buf); + dtp->dt_buffered_buf = NULL; + dtp->dt_buffered_offs = 0; + dtp->dt_buffered_size = 0; +} + +void * +dt_zalloc(dtrace_hdl_t *dtp, size_t size) +{ + void *data; + + if ((data = malloc(size)) == NULL) + (void) dt_set_errno(dtp, EDT_NOMEM); + else + bzero(data, size); + + return (data); +} + +void * +dt_alloc(dtrace_hdl_t *dtp, size_t size) +{ + void *data; + + if ((data = malloc(size)) == NULL) + (void) dt_set_errno(dtp, EDT_NOMEM); + + return (data); +} + +void +dt_free(dtrace_hdl_t *dtp, void *data) +{ + assert(dtp != NULL); /* ensure sane use of this interface */ + free(data); +} + +void +dt_difo_free(dtrace_hdl_t *dtp, dtrace_difo_t *dp) +{ + if (dp == NULL) + return; /* simplify caller code */ + + dt_free(dtp, dp->dtdo_buf); + dt_free(dtp, dp->dtdo_inttab); + dt_free(dtp, dp->dtdo_strtab); + dt_free(dtp, dp->dtdo_vartab); + dt_free(dtp, dp->dtdo_kreltab); + dt_free(dtp, dp->dtdo_ureltab); + dt_free(dtp, dp->dtdo_xlmtab); + + dt_free(dtp, dp); +} + +/* + * dt_gmatch() is similar to gmatch(3GEN) and dtrace(7D) globbing, but also + * implements the behavior that an empty pattern matches any string. + */ +int +dt_gmatch(const char *s, const char *p) +{ + return (p == NULL || *p == '\0' || gmatch(s, p)); +} + +char * +dt_basename(char *str) +{ + char *last = strrchr(str, '/'); + + if (last == NULL) + return (str); + + return (last + 1); +} + +/* + * dt_popc() is a fast implementation of population count. The algorithm is + * from "Hacker's Delight" by Henry Warren, Jr with a 64-bit equivalent added. + */ +ulong_t +dt_popc(ulong_t x) +{ +#if defined(_ILP32) + x = x - ((x >> 1) & 0x55555555UL); + x = (x & 0x33333333UL) + ((x >> 2) & 0x33333333UL); + x = (x + (x >> 4)) & 0x0F0F0F0FUL; + x = x + (x >> 8); + x = x + (x >> 16); + return (x & 0x3F); +#elif defined(_LP64) + x = x - ((x >> 1) & 0x5555555555555555ULL); + x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); + x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + x = x + (x >> 8); + x = x + (x >> 16); + x = x + (x >> 32); + return (x & 0x7F); +#else +/* This should be a #warning but for now ignore error. Err: "need td_popc() implementation" */ +#endif +} + +/* + * dt_popcb() is a bitmap-based version of population count that returns the + * number of one bits in the specified bitmap 'bp' at bit positions below 'n'. + */ +ulong_t +dt_popcb(const ulong_t *bp, ulong_t n) +{ + ulong_t maxb = n & BT_ULMASK; + ulong_t maxw = n >> BT_ULSHIFT; + ulong_t w, popc = 0; + + if (n == 0) + return (0); + + for (w = 0; w < maxw; w++) + popc += dt_popc(bp[w]); + + return (popc + dt_popc(bp[maxw] & ((1UL << maxb) - 1))); +} + +#if defined(sun) +struct _rwlock; +struct _lwp_mutex; + +int +dt_rw_read_held(pthread_rwlock_t *lock) +{ + extern int _rw_read_held(struct _rwlock *); + return (_rw_read_held((struct _rwlock *)lock)); +} + +int +dt_rw_write_held(pthread_rwlock_t *lock) +{ + extern int _rw_write_held(struct _rwlock *); + return (_rw_write_held((struct _rwlock *)lock)); +} +#endif + +int +dt_mutex_held(pthread_mutex_t *lock) +{ +#if defined(sun) + extern int _mutex_held(struct _lwp_mutex *); + return (_mutex_held((struct _lwp_mutex *)lock)); +#else + return (1); +#endif +} + +static int +dt_string2str(char *s, char *str, int nbytes) +{ + int len = strlen(s); + + if (nbytes == 0) { + /* + * Like snprintf(3C), we don't check the value of str if the + * number of bytes is 0. + */ + return (len); + } + + if (nbytes <= len) { + (void) strncpy(str, s, nbytes - 1); + /* + * Like snprintf(3C) (and unlike strncpy(3C)), we guarantee + * that the string is null-terminated. + */ + str[nbytes - 1] = '\0'; + } else { + (void) strcpy(str, s); + } + + return (len); +} + +int +dtrace_addr2str(dtrace_hdl_t *dtp, uint64_t addr, char *str, int nbytes) +{ + dtrace_syminfo_t dts; + GElf_Sym sym; + + size_t n = 20; /* for 0x%llx\0 */ + char *s; + int err; + + if ((err = dtrace_lookup_by_addr(dtp, addr, &sym, &dts)) == 0) + n += strlen(dts.dts_object) + strlen(dts.dts_name) + 2; /* +` */ + + s = alloca(n); + + if (err == 0 && addr != sym.st_value) { + (void) snprintf(s, n, "%s`%s+0x%llx", dts.dts_object, + dts.dts_name, (u_longlong_t)addr - sym.st_value); + } else if (err == 0) { + (void) snprintf(s, n, "%s`%s", + dts.dts_object, dts.dts_name); + } else { + /* + * We'll repeat the lookup, but this time we'll specify a NULL + * GElf_Sym -- indicating that we're only interested in the + * containing module. + */ + if (dtrace_lookup_by_addr(dtp, addr, NULL, &dts) == 0) { + (void) snprintf(s, n, "%s`0x%llx", dts.dts_object, + (u_longlong_t)addr); + } else { + (void) snprintf(s, n, "0x%llx", (u_longlong_t)addr); + } + } + + return (dt_string2str(s, str, nbytes)); +} + +int +dtrace_uaddr2str(dtrace_hdl_t *dtp, pid_t pid, + uint64_t addr, char *str, int nbytes) +{ + char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2]; + struct ps_prochandle *P = NULL; + GElf_Sym sym; + char *obj; + + if (pid != 0) + P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0); + + if (P == NULL) { + (void) snprintf(c, sizeof (c), "0x%jx", (uintmax_t)addr); + return (dt_string2str(c, str, nbytes)); + } + + dt_proc_lock(dtp, P); + + if (Plookup_by_addr(P, addr, name, sizeof (name), &sym) == 0) { + (void) Pobjname(P, addr, objname, sizeof (objname)); + + obj = dt_basename(objname); + + if (addr > sym.st_value) { + (void) snprintf(c, sizeof (c), "%s`%s+0x%llx", obj, + name, (u_longlong_t)(addr - sym.st_value)); + } else { + (void) snprintf(c, sizeof (c), "%s`%s", obj, name); + } + } else if (Pobjname(P, addr, objname, sizeof (objname)) != 0) { + (void) snprintf(c, sizeof (c), "%s`0x%jx", + dt_basename(objname), (uintmax_t)addr); + } else { + (void) snprintf(c, sizeof (c), "0x%jx", (uintmax_t)addr); + } + + dt_proc_unlock(dtp, P); + dt_proc_release(dtp, P); + + return (dt_string2str(c, str, nbytes)); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_work.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_work.c new file mode 100644 index 0000000..d19fb88 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_work.c @@ -0,0 +1,320 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <dt_impl.h> +#include <stddef.h> +#include <errno.h> +#include <assert.h> +#include <time.h> + +static const struct { + int dtslt_option; + size_t dtslt_offs; +} _dtrace_sleeptab[] = { + { DTRACEOPT_STATUSRATE, offsetof(dtrace_hdl_t, dt_laststatus) }, + { DTRACEOPT_AGGRATE, offsetof(dtrace_hdl_t, dt_lastagg) }, + { DTRACEOPT_SWITCHRATE, offsetof(dtrace_hdl_t, dt_lastswitch) }, + { DTRACEOPT_MAX, 0 } +}; + +void +dtrace_sleep(dtrace_hdl_t *dtp) +{ + dt_proc_hash_t *dph = dtp->dt_procs; + dtrace_optval_t policy = dtp->dt_options[DTRACEOPT_BUFPOLICY]; + dt_proc_notify_t *dprn; + + hrtime_t earliest = INT64_MAX; + struct timespec tv; + hrtime_t now; + int i; + + for (i = 0; _dtrace_sleeptab[i].dtslt_option < DTRACEOPT_MAX; i++) { + uintptr_t a = (uintptr_t)dtp + _dtrace_sleeptab[i].dtslt_offs; + int opt = _dtrace_sleeptab[i].dtslt_option; + dtrace_optval_t interval = dtp->dt_options[opt]; + + /* + * If the buffering policy is set to anything other than + * "switch", we ignore the aggrate and switchrate -- they're + * meaningless. + */ + if (policy != DTRACEOPT_BUFPOLICY_SWITCH && + _dtrace_sleeptab[i].dtslt_option != DTRACEOPT_STATUSRATE) + continue; + + if (*((hrtime_t *)a) + interval < earliest) + earliest = *((hrtime_t *)a) + interval; + } + + (void) pthread_mutex_lock(&dph->dph_lock); + + now = gethrtime(); + + if (earliest < now) { + (void) pthread_mutex_unlock(&dph->dph_lock); + return; /* sleep duration has already past */ + } + +#if defined(sun) + tv.tv_sec = (earliest - now) / NANOSEC; + tv.tv_nsec = (earliest - now) % NANOSEC; + + /* + * Wait for either 'tv' nanoseconds to pass or to receive notification + * that a process is in an interesting state. Regardless of why we + * awaken, iterate over any pending notifications and process them. + */ + (void) pthread_cond_reltimedwait_np(&dph->dph_cv, &dph->dph_lock, &tv); +#else + earliest -= now; + clock_gettime(CLOCK_REALTIME,&tv); + tv.tv_sec += earliest / NANOSEC; + tv.tv_nsec += earliest % NANOSEC; + while (tv.tv_nsec > NANOSEC) { + tv.tv_sec += 1; + tv.tv_nsec -= NANOSEC; + } + + /* + * Wait for either 'tv' nanoseconds to pass or to receive notification + * that a process is in an interesting state. Regardless of why we + * awaken, iterate over any pending notifications and process them. + */ + (void) pthread_cond_timedwait(&dph->dph_cv, &dph->dph_lock, &tv); +#endif + + while ((dprn = dph->dph_notify) != NULL) { + if (dtp->dt_prochdlr != NULL) { + char *err = dprn->dprn_errmsg; + if (*err == '\0') + err = NULL; + + dtp->dt_prochdlr(dprn->dprn_dpr->dpr_proc, err, + dtp->dt_procarg); + } + + dph->dph_notify = dprn->dprn_next; + dt_free(dtp, dprn); + } + + (void) pthread_mutex_unlock(&dph->dph_lock); +} + +int +dtrace_status(dtrace_hdl_t *dtp) +{ + int gen = dtp->dt_statusgen; + dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_STATUSRATE]; + hrtime_t now = gethrtime(); + + if (!dtp->dt_active) + return (DTRACE_STATUS_NONE); + + if (dtp->dt_stopped) + return (DTRACE_STATUS_STOPPED); + + if (dtp->dt_laststatus != 0) { + if (now - dtp->dt_laststatus < interval) + return (DTRACE_STATUS_NONE); + + dtp->dt_laststatus += interval; + } else { + dtp->dt_laststatus = now; + } + + if (dt_ioctl(dtp, DTRACEIOC_STATUS, &dtp->dt_status[gen]) == -1) + return (dt_set_errno(dtp, errno)); + + dtp->dt_statusgen ^= 1; + + if (dt_handle_status(dtp, &dtp->dt_status[dtp->dt_statusgen], + &dtp->dt_status[gen]) == -1) + return (-1); + + if (dtp->dt_status[gen].dtst_exiting) { + if (!dtp->dt_stopped) + (void) dtrace_stop(dtp); + + return (DTRACE_STATUS_EXITED); + } + + if (dtp->dt_status[gen].dtst_filled == 0) + return (DTRACE_STATUS_OKAY); + + if (dtp->dt_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL) + return (DTRACE_STATUS_OKAY); + + if (!dtp->dt_stopped) { + if (dtrace_stop(dtp) == -1) + return (-1); + } + + return (DTRACE_STATUS_FILLED); +} + +int +dtrace_go(dtrace_hdl_t *dtp) +{ + dtrace_enable_io_t args; + void *dof; + int error, r; + + if (dtp->dt_active) + return (dt_set_errno(dtp, EINVAL)); + + /* + * If a dtrace:::ERROR program and callback are registered, enable the + * program before we start tracing. If this fails for a vector open + * with ENOTTY, we permit dtrace_go() to succeed so that vector clients + * such as mdb's dtrace module can execute the rest of dtrace_go() even + * though they do not provide support for the DTRACEIOC_ENABLE ioctl. + */ + if (dtp->dt_errprog != NULL && + dtrace_program_exec(dtp, dtp->dt_errprog, NULL) == -1 && ( + dtp->dt_errno != ENOTTY || dtp->dt_vector == NULL)) + return (-1); /* dt_errno has been set for us */ + + if ((dof = dtrace_getopt_dof(dtp)) == NULL) + return (-1); /* dt_errno has been set for us */ + + args.dof = dof; + args.n_matched = 0; + r = dt_ioctl(dtp, DTRACEIOC_ENABLE, &args); + error = errno; + dtrace_dof_destroy(dtp, dof); + + if (r == -1 && (error != ENOTTY || dtp->dt_vector == NULL)) + return (dt_set_errno(dtp, error)); + + if (dt_ioctl(dtp, DTRACEIOC_GO, &dtp->dt_beganon) == -1) { + if (errno == EACCES) + return (dt_set_errno(dtp, EDT_DESTRUCTIVE)); + + if (errno == EALREADY) + return (dt_set_errno(dtp, EDT_ISANON)); + + if (errno == ENOENT) + return (dt_set_errno(dtp, EDT_NOANON)); + + if (errno == E2BIG) + return (dt_set_errno(dtp, EDT_ENDTOOBIG)); + + if (errno == ENOSPC) + return (dt_set_errno(dtp, EDT_BUFTOOSMALL)); + + return (dt_set_errno(dtp, errno)); + } + + dtp->dt_active = 1; + + if (dt_options_load(dtp) == -1) + return (dt_set_errno(dtp, errno)); + + return (dt_aggregate_go(dtp)); +} + +int +dtrace_stop(dtrace_hdl_t *dtp) +{ + int gen = dtp->dt_statusgen; + + if (dtp->dt_stopped) + return (0); + + if (dt_ioctl(dtp, DTRACEIOC_STOP, &dtp->dt_endedon) == -1) + return (dt_set_errno(dtp, errno)); + + dtp->dt_stopped = 1; + + /* + * Now that we're stopped, we're going to get status one final time. + */ + if (dt_ioctl(dtp, DTRACEIOC_STATUS, &dtp->dt_status[gen]) == -1) + return (dt_set_errno(dtp, errno)); + + if (dt_handle_status(dtp, &dtp->dt_status[gen ^ 1], + &dtp->dt_status[gen]) == -1) + return (-1); + + return (0); +} + + +dtrace_workstatus_t +dtrace_work(dtrace_hdl_t *dtp, FILE *fp, + dtrace_consume_probe_f *pfunc, dtrace_consume_rec_f *rfunc, void *arg) +{ + int status = dtrace_status(dtp); + dtrace_optval_t policy = dtp->dt_options[DTRACEOPT_BUFPOLICY]; + dtrace_workstatus_t rval; + + switch (status) { + case DTRACE_STATUS_EXITED: + case DTRACE_STATUS_FILLED: + case DTRACE_STATUS_STOPPED: + /* + * Tracing is stopped. We now want to force dtrace_consume() + * and dtrace_aggregate_snap() to proceed, regardless of + * switchrate and aggrate. We do this by clearing the times. + */ + dtp->dt_lastswitch = 0; + dtp->dt_lastagg = 0; + rval = DTRACE_WORKSTATUS_DONE; + break; + + case DTRACE_STATUS_NONE: + case DTRACE_STATUS_OKAY: + rval = DTRACE_WORKSTATUS_OKAY; + break; + + case -1: + return (DTRACE_WORKSTATUS_ERROR); + } + + if ((status == DTRACE_STATUS_NONE || status == DTRACE_STATUS_OKAY) && + policy != DTRACEOPT_BUFPOLICY_SWITCH) { + /* + * There either isn't any status or things are fine -- and + * this is a "ring" or "fill" buffer. We don't want to consume + * any of the trace data or snapshot the aggregations; we just + * return. + */ + assert(rval == DTRACE_WORKSTATUS_OKAY); + return (rval); + } + + if (dtrace_aggregate_snap(dtp) == -1) + return (DTRACE_WORKSTATUS_ERROR); + + if (dtrace_consume(dtp, fp, pfunc, rfunc, arg) == -1) + return (DTRACE_WORKSTATUS_ERROR); + + return (rval); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_xlator.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_xlator.c new file mode 100644 index 0000000..74bd487 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_xlator.c @@ -0,0 +1,386 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. + */ + +#include <strings.h> +#include <assert.h> + +#include <dt_xlator.h> +#include <dt_parser.h> +#include <dt_grammar.h> +#include <dt_module.h> +#include <dt_impl.h> + +/* + * Create a member node corresponding to one of the output members of a dynamic + * translator. We set the member's dn_membexpr to a DT_NODE_XLATOR node that + * has dn_op set to DT_TOK_XLATE and refers back to the translator itself. The + * code generator will then use this as the indicator for dynamic translation. + */ +/*ARGSUSED*/ +static int +dt_xlator_create_member(const char *name, ctf_id_t type, ulong_t off, void *arg) +{ + dt_xlator_t *dxp = arg; + dtrace_hdl_t *dtp = dxp->dx_hdl; + dt_node_t *enp, *mnp; + + if ((enp = dt_node_xalloc(dtp, DT_NODE_XLATOR)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + enp->dn_link = dxp->dx_nodes; + dxp->dx_nodes = enp; + + if ((mnp = dt_node_xalloc(dtp, DT_NODE_MEMBER)) == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + mnp->dn_link = dxp->dx_nodes; + dxp->dx_nodes = mnp; + + /* + * For the member expression, we use a DT_NODE_XLATOR/TOK_XLATE whose + * xlator refers back to the translator and whose dn_xmember refers to + * the current member. These refs will be used by dt_cg.c and dt_as.c. + */ + enp->dn_op = DT_TOK_XLATE; + enp->dn_xlator = dxp; + enp->dn_xmember = mnp; + dt_node_type_assign(enp, dxp->dx_dst_ctfp, type, B_FALSE); + + /* + * For the member itself, we use a DT_NODE_MEMBER as usual with the + * appropriate name, output type, and member expression set to 'enp'. + */ + if (dxp->dx_members != NULL) { + assert(enp->dn_link->dn_kind == DT_NODE_MEMBER); + enp->dn_link->dn_list = mnp; + } else + dxp->dx_members = mnp; + + mnp->dn_membname = strdup(name); + mnp->dn_membexpr = enp; + dt_node_type_assign(mnp, dxp->dx_dst_ctfp, type, B_FALSE); + + if (mnp->dn_membname == NULL) + return (dt_set_errno(dtp, EDT_NOMEM)); + + return (0); +} + +dt_xlator_t * +dt_xlator_create(dtrace_hdl_t *dtp, + const dtrace_typeinfo_t *src, const dtrace_typeinfo_t *dst, + const char *name, dt_node_t *members, dt_node_t *nodes) +{ + dt_xlator_t *dxp = dt_zalloc(dtp, sizeof (dt_xlator_t)); + dtrace_typeinfo_t ptr = *dst; + dt_xlator_t **map; + dt_node_t *dnp; + uint_t kind; + + if (dxp == NULL) + return (NULL); + + dxp->dx_hdl = dtp; + dxp->dx_id = dtp->dt_xlatorid++; + dxp->dx_gen = dtp->dt_gen; + dxp->dx_arg = -1; + + if ((map = dt_alloc(dtp, sizeof (void *) * (dxp->dx_id + 1))) == NULL) { + dt_free(dtp, dxp); + return (NULL); + } + + dt_list_append(&dtp->dt_xlators, dxp); + bcopy(dtp->dt_xlatormap, map, sizeof (void *) * dxp->dx_id); + dt_free(dtp, dtp->dt_xlatormap); + dtp->dt_xlatormap = map; + dtp->dt_xlatormap[dxp->dx_id] = dxp; + + if (dt_type_pointer(&ptr) == -1) { + ptr.dtt_ctfp = NULL; + ptr.dtt_type = CTF_ERR; + } + + dxp->dx_ident = dt_ident_create(name ? name : "T", + DT_IDENT_SCALAR, DT_IDFLG_REF | DT_IDFLG_ORPHAN, 0, + _dtrace_defattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen); + + if (dxp->dx_ident == NULL) + goto err; /* no memory for identifier */ + + dxp->dx_ident->di_ctfp = src->dtt_ctfp; + dxp->dx_ident->di_type = src->dtt_type; + + /* + * If an input parameter name is given, this is a static translator + * definition: create an idhash and identifier for the parameter. + */ + if (name != NULL) { + dxp->dx_locals = dt_idhash_create("xlparams", NULL, 0, 0); + + if (dxp->dx_locals == NULL) + goto err; /* no memory for identifier hash */ + + dt_idhash_xinsert(dxp->dx_locals, dxp->dx_ident); + } + + dxp->dx_souid.di_name = "translator"; + dxp->dx_souid.di_kind = DT_IDENT_XLSOU; + dxp->dx_souid.di_flags = DT_IDFLG_REF; + dxp->dx_souid.di_id = dxp->dx_id; + dxp->dx_souid.di_attr = _dtrace_defattr; + dxp->dx_souid.di_ops = &dt_idops_thaw; + dxp->dx_souid.di_data = dxp; + dxp->dx_souid.di_ctfp = dst->dtt_ctfp; + dxp->dx_souid.di_type = dst->dtt_type; + dxp->dx_souid.di_gen = dtp->dt_gen; + + dxp->dx_ptrid.di_name = "translator"; + dxp->dx_ptrid.di_kind = DT_IDENT_XLPTR; + dxp->dx_ptrid.di_flags = DT_IDFLG_REF; + dxp->dx_ptrid.di_id = dxp->dx_id; + dxp->dx_ptrid.di_attr = _dtrace_defattr; + dxp->dx_ptrid.di_ops = &dt_idops_thaw; + dxp->dx_ptrid.di_data = dxp; + dxp->dx_ptrid.di_ctfp = ptr.dtt_ctfp; + dxp->dx_ptrid.di_type = ptr.dtt_type; + dxp->dx_ptrid.di_gen = dtp->dt_gen; + + /* + * If a deferred pragma is pending on the keyword "translator", run all + * the deferred pragmas on dx_souid and then copy results to dx_ptrid. + * See the code in dt_pragma.c for details on deferred ident pragmas. + */ + if (dtp->dt_globals->dh_defer != NULL && yypcb->pcb_pragmas != NULL && + dt_idhash_lookup(yypcb->pcb_pragmas, "translator") != NULL) { + dtp->dt_globals->dh_defer(dtp->dt_globals, &dxp->dx_souid); + dxp->dx_ptrid.di_attr = dxp->dx_souid.di_attr; + dxp->dx_ptrid.di_vers = dxp->dx_souid.di_vers; + } + + dxp->dx_src_ctfp = src->dtt_ctfp; + dxp->dx_src_type = src->dtt_type; + dxp->dx_src_base = ctf_type_resolve(src->dtt_ctfp, src->dtt_type); + + dxp->dx_dst_ctfp = dst->dtt_ctfp; + dxp->dx_dst_type = dst->dtt_type; + dxp->dx_dst_base = ctf_type_resolve(dst->dtt_ctfp, dst->dtt_type); + + kind = ctf_type_kind(dst->dtt_ctfp, dxp->dx_dst_base); + assert(kind == CTF_K_STRUCT || kind == CTF_K_UNION); + + /* + * If no input parameter is given, we're making a dynamic translator: + * create member nodes for every member of the output type. Otherwise + * retain the member and allocation node lists presented by the parser. + */ + if (name == NULL) { + if (ctf_member_iter(dxp->dx_dst_ctfp, dxp->dx_dst_base, + dt_xlator_create_member, dxp) != 0) + goto err; + } else { + dxp->dx_members = members; + dxp->dx_nodes = nodes; + } + + /* + * Assign member IDs to each member and allocate space for DIFOs + * if and when this translator is eventually compiled. + */ + for (dnp = dxp->dx_members; dnp != NULL; dnp = dnp->dn_list) { + dnp->dn_membxlator = dxp; + dnp->dn_membid = dxp->dx_nmembers++; + } + + dxp->dx_membdif = dt_zalloc(dtp, + sizeof (dtrace_difo_t *) * dxp->dx_nmembers); + + if (dxp->dx_membdif == NULL) { + dxp->dx_nmembers = 0; + goto err; + } + + return (dxp); + +err: + dt_xlator_destroy(dtp, dxp); + return (NULL); +} + +void +dt_xlator_destroy(dtrace_hdl_t *dtp, dt_xlator_t *dxp) +{ + uint_t i; + + dt_node_link_free(&dxp->dx_nodes); + + if (dxp->dx_locals != NULL) + dt_idhash_destroy(dxp->dx_locals); + else if (dxp->dx_ident != NULL) + dt_ident_destroy(dxp->dx_ident); + + for (i = 0; i < dxp->dx_nmembers; i++) + dt_difo_free(dtp, dxp->dx_membdif[i]); + + dt_free(dtp, dxp->dx_membdif); + dt_list_delete(&dtp->dt_xlators, dxp); + dt_free(dtp, dxp); +} + +dt_xlator_t * +dt_xlator_lookup(dtrace_hdl_t *dtp, dt_node_t *src, dt_node_t *dst, int flags) +{ + ctf_file_t *src_ctfp = src->dn_ctfp; + ctf_id_t src_type = src->dn_type; + ctf_id_t src_base = ctf_type_resolve(src_ctfp, src_type); + + ctf_file_t *dst_ctfp = dst->dn_ctfp; + ctf_id_t dst_type = dst->dn_type; + ctf_id_t dst_base = ctf_type_resolve(dst_ctfp, dst_type); + uint_t dst_kind = ctf_type_kind(dst_ctfp, dst_base); + + int ptr = dst_kind == CTF_K_POINTER; + dtrace_typeinfo_t src_dtt, dst_dtt; + dt_node_t xn = { 0 }; + dt_xlator_t *dxp = NULL; + + if (src_base == CTF_ERR || dst_base == CTF_ERR) + return (NULL); /* fail if these are unresolvable types */ + + /* + * Translators are always defined using a struct or union type, so if + * we are attempting to translate to type "T *", we internally look + * for a translation to type "T" by following the pointer reference. + */ + if (ptr) { + dst_type = ctf_type_reference(dst_ctfp, dst_type); + dst_base = ctf_type_resolve(dst_ctfp, dst_type); + dst_kind = ctf_type_kind(dst_ctfp, dst_base); + } + + if (dst_kind != CTF_K_UNION && dst_kind != CTF_K_STRUCT) + return (NULL); /* fail if the output isn't a struct or union */ + + /* + * In order to find a matching translator, we iterate over the set of + * available translators in three passes. First, we look for a + * translation from the exact source type to the resolved destination. + * Second, we look for a translation from the resolved source type to + * the resolved destination. Third, we look for a translation from a + * compatible source type (using the same rules as parameter formals) + * to the resolved destination. If all passes fail, return NULL. + */ + for (dxp = dt_list_next(&dtp->dt_xlators); dxp != NULL; + dxp = dt_list_next(dxp)) { + if (ctf_type_compat(dxp->dx_src_ctfp, dxp->dx_src_type, + src_ctfp, src_type) && + ctf_type_compat(dxp->dx_dst_ctfp, dxp->dx_dst_base, + dst_ctfp, dst_base)) + goto out; + } + + if (flags & DT_XLATE_EXACT) + goto out; /* skip remaining passes if exact match required */ + + for (dxp = dt_list_next(&dtp->dt_xlators); dxp != NULL; + dxp = dt_list_next(dxp)) { + if (ctf_type_compat(dxp->dx_src_ctfp, dxp->dx_src_base, + src_ctfp, src_type) && + ctf_type_compat(dxp->dx_dst_ctfp, dxp->dx_dst_base, + dst_ctfp, dst_base)) + goto out; + } + + for (dxp = dt_list_next(&dtp->dt_xlators); dxp != NULL; + dxp = dt_list_next(dxp)) { + dt_node_type_assign(&xn, dxp->dx_src_ctfp, dxp->dx_src_type, + B_FALSE); + if (ctf_type_compat(dxp->dx_dst_ctfp, dxp->dx_dst_base, + dst_ctfp, dst_base) && dt_node_is_argcompat(src, &xn)) + goto out; + } + +out: + if (ptr && dxp != NULL && dxp->dx_ptrid.di_type == CTF_ERR) + return (NULL); /* no translation available to pointer type */ + + if (dxp != NULL || !(flags & DT_XLATE_EXTERN) || + dtp->dt_xlatemode == DT_XL_STATIC) + return (dxp); /* we succeeded or not allowed to extern */ + + /* + * If we get here, then we didn't find an existing translator, but the + * caller and xlatemode permit us to create an extern to a dynamic one. + */ + src_dtt.dtt_object = dt_module_lookup_by_ctf(dtp, src_ctfp)->dm_name; + src_dtt.dtt_ctfp = src_ctfp; + src_dtt.dtt_type = src_type; + + dst_dtt.dtt_object = dt_module_lookup_by_ctf(dtp, dst_ctfp)->dm_name; + dst_dtt.dtt_ctfp = dst_ctfp; + dst_dtt.dtt_type = dst_type; + + return (dt_xlator_create(dtp, &src_dtt, &dst_dtt, NULL, NULL, NULL)); +} + +dt_xlator_t * +dt_xlator_lookup_id(dtrace_hdl_t *dtp, id_t id) +{ + assert(id >= 0 && id < dtp->dt_xlatorid); + return (dtp->dt_xlatormap[id]); +} + +dt_ident_t * +dt_xlator_ident(dt_xlator_t *dxp, ctf_file_t *ctfp, ctf_id_t type) +{ + if (ctf_type_kind(ctfp, ctf_type_resolve(ctfp, type)) == CTF_K_POINTER) + return (&dxp->dx_ptrid); + else + return (&dxp->dx_souid); +} + +dt_node_t * +dt_xlator_member(dt_xlator_t *dxp, const char *name) +{ + dt_node_t *dnp; + + for (dnp = dxp->dx_members; dnp != NULL; dnp = dnp->dn_list) { + if (strcmp(dnp->dn_membname, name) == 0) + return (dnp); + } + + return (NULL); +} + +int +dt_xlator_dynamic(const dt_xlator_t *dxp) +{ + return (dxp->dx_locals == NULL); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_xlator.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_xlator.h new file mode 100644 index 0000000..a30f3af --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_xlator.h @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DT_XLATOR_H +#define _DT_XLATOR_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libctf.h> +#include <dtrace.h> +#include <dt_ident.h> +#include <dt_list.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dt_node; + +typedef struct dt_xlator { + dt_list_t dx_list; /* list forward/back pointers */ + dt_idhash_t *dx_locals; /* hash of local scope identifiers */ + dt_ident_t *dx_ident; /* identifier ref for input param */ + dt_ident_t dx_souid; /* fake identifier for sou output */ + dt_ident_t dx_ptrid; /* fake identifier for ptr output */ + ctf_file_t *dx_src_ctfp; /* CTF container for input type */ + ctf_id_t dx_src_type; /* CTF reference for input type */ + ctf_id_t dx_src_base; /* CTF reference for input base */ + ctf_file_t *dx_dst_ctfp; /* CTF container for output type */ + ctf_id_t dx_dst_type; /* CTF reference for output type */ + ctf_id_t dx_dst_base; /* CTF reference for output base */ + struct dt_node *dx_members; /* list of member translations */ + uint_t dx_nmembers; /* length of dx_members list */ + dtrace_difo_t **dx_membdif; /* DIF for member expressions */ + struct dt_node *dx_nodes; /* list of parse tree nodes */ + dtrace_hdl_t *dx_hdl; /* back pointer to containing handle */ + ulong_t dx_gen; /* generation number that created me */ + id_t dx_id; /* global translator id */ + int dx_arg; /* dynamic argument index */ +} dt_xlator_t; + +extern dt_xlator_t *dt_xlator_create(dtrace_hdl_t *, + const dtrace_typeinfo_t *, const dtrace_typeinfo_t *, + const char *, struct dt_node *, struct dt_node *); + +extern void dt_xlator_destroy(dtrace_hdl_t *, dt_xlator_t *); + +#define DT_XLATE_FUZZY 0x0 /* lookup any matching translator */ +#define DT_XLATE_EXACT 0x1 /* lookup only exact type matches */ +#define DT_XLATE_EXTERN 0x2 /* extern translator if none exists */ + +extern dt_xlator_t *dt_xlator_lookup(dtrace_hdl_t *, + struct dt_node *, struct dt_node *, int); + +extern dt_xlator_t *dt_xlator_lookup_id(dtrace_hdl_t *, id_t); +extern dt_ident_t *dt_xlator_ident(dt_xlator_t *, ctf_file_t *, ctf_id_t); +extern struct dt_node *dt_xlator_member(dt_xlator_t *, const char *); +extern int dt_xlator_dynamic(const dt_xlator_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DT_XLATOR_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dtrace.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dtrace.h new file mode 100644 index 0000000..6f88e6d --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dtrace.h @@ -0,0 +1,613 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#ifndef _DTRACE_H +#define _DTRACE_H + +#include <sys/dtrace.h> +#include <stdarg.h> +#include <stdio.h> +#include <gelf.h> +#include <libproc.h> +#if !defined(sun) +#include <rtld_db.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * DTrace Dynamic Tracing Software: Library Interfaces + * + * Note: The contents of this file are private to the implementation of the + * Solaris system and DTrace subsystem and are subject to change at any time + * without notice. Applications and drivers using these interfaces will fail + * to run on future releases. These interfaces should not be used for any + * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). + * Please refer to the "Solaris Dynamic Tracing Guide" for more information. + */ + +#define DTRACE_VERSION 3 /* library ABI interface version */ + +struct ps_prochandle; +typedef struct dtrace_hdl dtrace_hdl_t; +typedef struct dtrace_prog dtrace_prog_t; +typedef struct dtrace_vector dtrace_vector_t; +typedef struct dtrace_aggdata dtrace_aggdata_t; + +#define DTRACE_O_NODEV 0x01 /* do not open dtrace(7D) device */ +#define DTRACE_O_NOSYS 0x02 /* do not load /system/object modules */ +#define DTRACE_O_LP64 0x04 /* force D compiler to be LP64 */ +#define DTRACE_O_ILP32 0x08 /* force D compiler to be ILP32 */ +#define DTRACE_O_MASK 0x0f /* mask of valid flags to dtrace_open */ + +extern dtrace_hdl_t *dtrace_open(int, int, int *); +extern dtrace_hdl_t *dtrace_vopen(int, int, int *, + const dtrace_vector_t *, void *); + +extern int dtrace_go(dtrace_hdl_t *); +extern int dtrace_stop(dtrace_hdl_t *); +extern void dtrace_sleep(dtrace_hdl_t *); +extern void dtrace_close(dtrace_hdl_t *); + +extern int dtrace_errno(dtrace_hdl_t *); +extern const char *dtrace_errmsg(dtrace_hdl_t *, int); +extern const char *dtrace_faultstr(dtrace_hdl_t *, int); +extern const char *dtrace_subrstr(dtrace_hdl_t *, int); + +extern int dtrace_setopt(dtrace_hdl_t *, const char *, const char *); +extern int dtrace_getopt(dtrace_hdl_t *, const char *, dtrace_optval_t *); + +extern void dtrace_update(dtrace_hdl_t *); +extern int dtrace_ctlfd(dtrace_hdl_t *); + +/* + * DTrace Program Interface + * + * DTrace programs can be created by compiling ASCII text files containing + * D programs or by compiling in-memory C strings that specify a D program. + * Once created, callers can examine the list of program statements and + * enable the probes and actions described by these statements. + */ + +typedef struct dtrace_proginfo { + dtrace_attribute_t dpi_descattr; /* minimum probedesc attributes */ + dtrace_attribute_t dpi_stmtattr; /* minimum statement attributes */ + uint_t dpi_aggregates; /* number of aggregates specified in program */ + uint_t dpi_recgens; /* number of record generating probes in prog */ + uint_t dpi_matches; /* number of probes matched by program */ + uint_t dpi_speculations; /* number of speculations specified in prog */ +} dtrace_proginfo_t; + +#define DTRACE_C_DIFV 0x0001 /* DIF verbose mode: show each compiled DIFO */ +#define DTRACE_C_EMPTY 0x0002 /* Permit compilation of empty D source files */ +#define DTRACE_C_ZDEFS 0x0004 /* Permit probe defs that match zero probes */ +#define DTRACE_C_EATTR 0x0008 /* Error if program attributes less than min */ +#define DTRACE_C_CPP 0x0010 /* Preprocess input file with cpp(1) utility */ +#define DTRACE_C_KNODEF 0x0020 /* Permit unresolved kernel symbols in DIFO */ +#define DTRACE_C_UNODEF 0x0040 /* Permit unresolved user symbols in DIFO */ +#define DTRACE_C_PSPEC 0x0080 /* Intepret ambiguous specifiers as probes */ +#define DTRACE_C_ETAGS 0x0100 /* Prefix error messages with error tags */ +#define DTRACE_C_ARGREF 0x0200 /* Do not require all macro args to be used */ +#define DTRACE_C_DEFARG 0x0800 /* Use 0/"" as value for unspecified args */ +#define DTRACE_C_NOLIBS 0x1000 /* Do not process D system libraries */ +#define DTRACE_C_CTL 0x2000 /* Only process control directives */ +#define DTRACE_C_MASK 0x3bff /* mask of all valid flags to dtrace_*compile */ + +extern dtrace_prog_t *dtrace_program_strcompile(dtrace_hdl_t *, + const char *, dtrace_probespec_t, uint_t, int, char *const []); + +extern dtrace_prog_t *dtrace_program_fcompile(dtrace_hdl_t *, + FILE *, uint_t, int, char *const []); + +extern int dtrace_program_exec(dtrace_hdl_t *, dtrace_prog_t *, + dtrace_proginfo_t *); +extern void dtrace_program_info(dtrace_hdl_t *, dtrace_prog_t *, + dtrace_proginfo_t *); + +#define DTRACE_D_STRIP 0x01 /* strip non-loadable sections from program */ +#define DTRACE_D_PROBES 0x02 /* include provider and probe definitions */ +#define DTRACE_D_MASK 0x03 /* mask of valid flags to dtrace_dof_create */ + +extern int dtrace_program_link(dtrace_hdl_t *, dtrace_prog_t *, + uint_t, const char *, int, char *const []); + +extern int dtrace_program_header(dtrace_hdl_t *, FILE *, const char *); + +extern void *dtrace_dof_create(dtrace_hdl_t *, dtrace_prog_t *, uint_t); +extern void dtrace_dof_destroy(dtrace_hdl_t *, void *); + +extern void *dtrace_getopt_dof(dtrace_hdl_t *); +extern void *dtrace_geterr_dof(dtrace_hdl_t *); + +typedef struct dtrace_stmtdesc { + dtrace_ecbdesc_t *dtsd_ecbdesc; /* ECB description */ + dtrace_actdesc_t *dtsd_action; /* action list */ + dtrace_actdesc_t *dtsd_action_last; /* last action in action list */ + void *dtsd_aggdata; /* aggregation data */ + void *dtsd_fmtdata; /* type-specific output data */ + void *dtsd_strdata; /* type-specific string data */ + void (*dtsd_callback)(void); /* callback function for EPID */ + void *dtsd_data; /* callback data pointer */ + dtrace_attribute_t dtsd_descattr; /* probedesc attributes */ + dtrace_attribute_t dtsd_stmtattr; /* statement attributes */ +} dtrace_stmtdesc_t; + +typedef int dtrace_stmt_f(dtrace_hdl_t *, dtrace_prog_t *, + dtrace_stmtdesc_t *, void *); + +extern dtrace_stmtdesc_t *dtrace_stmt_create(dtrace_hdl_t *, + dtrace_ecbdesc_t *); +extern dtrace_actdesc_t *dtrace_stmt_action(dtrace_hdl_t *, + dtrace_stmtdesc_t *); +extern int dtrace_stmt_add(dtrace_hdl_t *, dtrace_prog_t *, + dtrace_stmtdesc_t *); +extern int dtrace_stmt_iter(dtrace_hdl_t *, dtrace_prog_t *, + dtrace_stmt_f *, void *); +extern void dtrace_stmt_destroy(dtrace_hdl_t *, dtrace_stmtdesc_t *); + +/* + * DTrace Data Consumption Interface + */ +typedef enum { + DTRACEFLOW_ENTRY, + DTRACEFLOW_RETURN, + DTRACEFLOW_NONE +} dtrace_flowkind_t; + +#define DTRACE_CONSUME_ERROR -1 /* error while processing */ +#define DTRACE_CONSUME_THIS 0 /* consume this probe/record */ +#define DTRACE_CONSUME_NEXT 1 /* advance to next probe/rec */ +#define DTRACE_CONSUME_ABORT 2 /* abort consumption */ + +typedef struct dtrace_probedata { + dtrace_hdl_t *dtpda_handle; /* handle to DTrace library */ + dtrace_eprobedesc_t *dtpda_edesc; /* enabled probe description */ + dtrace_probedesc_t *dtpda_pdesc; /* probe description */ + processorid_t dtpda_cpu; /* CPU for data */ + caddr_t dtpda_data; /* pointer to raw data */ + dtrace_flowkind_t dtpda_flow; /* flow kind */ + const char *dtpda_prefix; /* recommended flow prefix */ + int dtpda_indent; /* recommended flow indent */ +} dtrace_probedata_t; + +typedef int dtrace_consume_probe_f(const dtrace_probedata_t *, void *); +typedef int dtrace_consume_rec_f(const dtrace_probedata_t *, + const dtrace_recdesc_t *, void *); + +extern int dtrace_consume(dtrace_hdl_t *, FILE *, + dtrace_consume_probe_f *, dtrace_consume_rec_f *, void *); + +#define DTRACE_STATUS_NONE 0 /* no status; not yet time */ +#define DTRACE_STATUS_OKAY 1 /* status okay */ +#define DTRACE_STATUS_EXITED 2 /* exit() was called; tracing stopped */ +#define DTRACE_STATUS_FILLED 3 /* fill buffer filled; tracing stoped */ +#define DTRACE_STATUS_STOPPED 4 /* tracing already stopped */ + +extern int dtrace_status(dtrace_hdl_t *); + +/* + * DTrace Formatted Output Interfaces + * + * To format output associated with a given dtrace_stmtdesc, the caller can + * invoke one of the following functions, passing the opaque dtsd_fmtdata and a + * list of record descriptions. These functions return either -1 to indicate + * an error, or a positive integer indicating the number of records consumed. + * For anonymous enablings, the consumer can use the dtrd_format member of + * the record description to obtain a format description. The dtfd_string + * member of the format description may be passed to dtrace_print{fa}_create() + * to create the opaque format data. + */ +extern void *dtrace_printf_create(dtrace_hdl_t *, const char *); +extern void *dtrace_printa_create(dtrace_hdl_t *, const char *); +extern size_t dtrace_printf_format(dtrace_hdl_t *, void *, char *, size_t); + +extern int dtrace_fprintf(dtrace_hdl_t *, FILE *, void *, + const dtrace_probedata_t *, const dtrace_recdesc_t *, uint_t, + const void *, size_t); + +extern int dtrace_fprinta(dtrace_hdl_t *, FILE *, void *, + const dtrace_probedata_t *, const dtrace_recdesc_t *, uint_t, + const void *, size_t); + +extern int dtrace_system(dtrace_hdl_t *, FILE *, void *, + const dtrace_probedata_t *, const dtrace_recdesc_t *, uint_t, + const void *, size_t); + +extern int dtrace_freopen(dtrace_hdl_t *, FILE *, void *, + const dtrace_probedata_t *, const dtrace_recdesc_t *, uint_t, + const void *, size_t); + +/* + * Type-specific output printing + * + * The print() action will associate a string data record that is actually the + * fully-qualified type name of the data traced by the DIFEXPR action. This is + * stored in the same 'format' record from the kernel, but we know by virtue of + * the fact that the action is still DIFEXPR that it is actually a reference to + * plain string data. + */ +extern int dtrace_print(dtrace_hdl_t *, FILE *, const char *, + caddr_t, size_t); + +/* + * DTrace Work Interface + */ +typedef enum { + DTRACE_WORKSTATUS_ERROR = -1, + DTRACE_WORKSTATUS_OKAY, + DTRACE_WORKSTATUS_DONE +} dtrace_workstatus_t; + +extern dtrace_workstatus_t dtrace_work(dtrace_hdl_t *, FILE *, + dtrace_consume_probe_f *, dtrace_consume_rec_f *, void *); + +/* + * DTrace Handler Interface + */ +#define DTRACE_HANDLE_ABORT -1 /* abort current operation */ +#define DTRACE_HANDLE_OK 0 /* handled okay; continue */ + +typedef struct dtrace_errdata { + dtrace_hdl_t *dteda_handle; /* handle to DTrace library */ + dtrace_eprobedesc_t *dteda_edesc; /* enabled probe inducing err */ + dtrace_probedesc_t *dteda_pdesc; /* probe inducing error */ + processorid_t dteda_cpu; /* CPU of error */ + int dteda_action; /* action inducing error */ + int dteda_offset; /* offset in DIFO of error */ + int dteda_fault; /* specific fault */ + uint64_t dteda_addr; /* address of fault, if any */ + const char *dteda_msg; /* preconstructed message */ +} dtrace_errdata_t; + +typedef int dtrace_handle_err_f(const dtrace_errdata_t *, void *); +extern int dtrace_handle_err(dtrace_hdl_t *, dtrace_handle_err_f *, void *); + +typedef enum { + DTRACEDROP_PRINCIPAL, /* drop to principal buffer */ + DTRACEDROP_AGGREGATION, /* drop to aggregation buffer */ + DTRACEDROP_DYNAMIC, /* dynamic drop */ + DTRACEDROP_DYNRINSE, /* dyn drop due to rinsing */ + DTRACEDROP_DYNDIRTY, /* dyn drop due to dirty */ + DTRACEDROP_SPEC, /* speculative drop */ + DTRACEDROP_SPECBUSY, /* spec drop due to busy */ + DTRACEDROP_SPECUNAVAIL, /* spec drop due to unavail */ + DTRACEDROP_STKSTROVERFLOW, /* stack string tab overflow */ + DTRACEDROP_DBLERROR /* error in ERROR probe */ +} dtrace_dropkind_t; + +typedef struct dtrace_dropdata { + dtrace_hdl_t *dtdda_handle; /* handle to DTrace library */ + processorid_t dtdda_cpu; /* CPU, if any */ + dtrace_dropkind_t dtdda_kind; /* kind of drop */ + uint64_t dtdda_drops; /* number of drops */ + uint64_t dtdda_total; /* total drops */ + const char *dtdda_msg; /* preconstructed message */ +} dtrace_dropdata_t; + +typedef int dtrace_handle_drop_f(const dtrace_dropdata_t *, void *); +extern int dtrace_handle_drop(dtrace_hdl_t *, dtrace_handle_drop_f *, void *); + +typedef void dtrace_handle_proc_f(struct ps_prochandle *, const char *, void *); +extern int dtrace_handle_proc(dtrace_hdl_t *, dtrace_handle_proc_f *, void *); + +#define DTRACE_BUFDATA_AGGKEY 0x0001 /* aggregation key */ +#define DTRACE_BUFDATA_AGGVAL 0x0002 /* aggregation value */ +#define DTRACE_BUFDATA_AGGFORMAT 0x0004 /* aggregation format data */ +#define DTRACE_BUFDATA_AGGLAST 0x0008 /* last for this key/val */ + +typedef struct dtrace_bufdata { + dtrace_hdl_t *dtbda_handle; /* handle to DTrace library */ + const char *dtbda_buffered; /* buffered output */ + dtrace_probedata_t *dtbda_probe; /* probe data */ + const dtrace_recdesc_t *dtbda_recdesc; /* record description */ + const dtrace_aggdata_t *dtbda_aggdata; /* aggregation data, if agg. */ + uint32_t dtbda_flags; /* flags; see above */ +} dtrace_bufdata_t; + +typedef int dtrace_handle_buffered_f(const dtrace_bufdata_t *, void *); +extern int dtrace_handle_buffered(dtrace_hdl_t *, + dtrace_handle_buffered_f *, void *); + +typedef struct dtrace_setoptdata { + dtrace_hdl_t *dtsda_handle; /* handle to DTrace library */ + const dtrace_probedata_t *dtsda_probe; /* probe data */ + const char *dtsda_option; /* option that was set */ + dtrace_optval_t dtsda_oldval; /* old value */ + dtrace_optval_t dtsda_newval; /* new value */ +} dtrace_setoptdata_t; + +typedef int dtrace_handle_setopt_f(const dtrace_setoptdata_t *, void *); +extern int dtrace_handle_setopt(dtrace_hdl_t *, + dtrace_handle_setopt_f *, void *); + +/* + * DTrace Aggregate Interface + */ + +#define DTRACE_A_PERCPU 0x0001 +#define DTRACE_A_KEEPDELTA 0x0002 +#define DTRACE_A_ANONYMOUS 0x0004 +#define DTRACE_A_TOTAL 0x0008 +#define DTRACE_A_MINMAXBIN 0x0010 +#define DTRACE_A_HASNEGATIVES 0x0020 +#define DTRACE_A_HASPOSITIVES 0x0040 + +#define DTRACE_AGGZOOM_MAX 0.95 /* height of max bar */ + +#define DTRACE_AGGWALK_ERROR -1 /* error while processing */ +#define DTRACE_AGGWALK_NEXT 0 /* proceed to next element */ +#define DTRACE_AGGWALK_ABORT 1 /* abort aggregation walk */ +#define DTRACE_AGGWALK_CLEAR 2 /* clear this element */ +#define DTRACE_AGGWALK_NORMALIZE 3 /* normalize this element */ +#define DTRACE_AGGWALK_DENORMALIZE 4 /* denormalize this element */ +#define DTRACE_AGGWALK_REMOVE 5 /* remove this element */ + +struct dtrace_aggdata { + dtrace_hdl_t *dtada_handle; /* handle to DTrace library */ + dtrace_aggdesc_t *dtada_desc; /* aggregation description */ + dtrace_eprobedesc_t *dtada_edesc; /* enabled probe description */ + dtrace_probedesc_t *dtada_pdesc; /* probe description */ + caddr_t dtada_data; /* pointer to raw data */ + uint64_t dtada_normal; /* the normal -- 1 for denorm */ + size_t dtada_size; /* total size of the data */ + caddr_t dtada_delta; /* delta data, if available */ + caddr_t *dtada_percpu; /* per CPU data, if avail */ + caddr_t *dtada_percpu_delta; /* per CPU delta, if avail */ + int64_t dtada_total; /* per agg total, if avail */ + uint16_t dtada_minbin; /* minimum bin, if avail */ + uint16_t dtada_maxbin; /* maximum bin, if avail */ + uint32_t dtada_flags; /* flags */ +}; + +typedef int dtrace_aggregate_f(const dtrace_aggdata_t *, void *); +typedef int dtrace_aggregate_walk_f(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); +typedef int dtrace_aggregate_walk_joined_f(const dtrace_aggdata_t **, + const int, void *); + +extern void dtrace_aggregate_clear(dtrace_hdl_t *); +extern int dtrace_aggregate_snap(dtrace_hdl_t *); +extern int dtrace_aggregate_print(dtrace_hdl_t *, FILE *, + dtrace_aggregate_walk_f *); + +extern int dtrace_aggregate_walk(dtrace_hdl_t *, dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_joined(dtrace_hdl_t *, + dtrace_aggvarid_t *, int, dtrace_aggregate_walk_joined_f *, void *); + +extern int dtrace_aggregate_walk_sorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_keysorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_valsorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_keyvarsorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_valvarsorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_keyrevsorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_valrevsorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_keyvarrevsorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +extern int dtrace_aggregate_walk_valvarrevsorted(dtrace_hdl_t *, + dtrace_aggregate_f *, void *); + +#define DTRACE_AGD_PRINTED 0x1 /* aggregation printed in program */ + +/* + * DTrace Process Control Interface + * + * Library clients who wish to have libdtrace create or grab processes for + * monitoring of their symbol table changes may use these interfaces to + * request that libdtrace obtain control of the process using libproc. + */ + +extern struct ps_prochandle *dtrace_proc_create(dtrace_hdl_t *, + const char *, char *const *, proc_child_func *, void *); + +extern struct ps_prochandle *dtrace_proc_grab(dtrace_hdl_t *, pid_t, int); +extern void dtrace_proc_release(dtrace_hdl_t *, struct ps_prochandle *); +extern void dtrace_proc_continue(dtrace_hdl_t *, struct ps_prochandle *); + +/* + * DTrace Object, Symbol, and Type Interfaces + * + * Library clients can use libdtrace to perform symbol and C type information + * lookups by symbol name, symbol address, or C type name, or to lookup meta- + * information cached for each of the program objects in use by DTrace. The + * resulting struct contain pointers to arbitrary-length strings, including + * object, symbol, and type names, that are persistent until the next call to + * dtrace_update(). Once dtrace_update() is called, any cached values must + * be flushed and not used subsequently by the client program. + */ + +#define DTRACE_OBJ_EXEC ((const char *)0L) /* primary executable file */ +#define DTRACE_OBJ_RTLD ((const char *)1L) /* run-time link-editor */ +#define DTRACE_OBJ_CDEFS ((const char *)2L) /* C include definitions */ +#define DTRACE_OBJ_DDEFS ((const char *)3L) /* D program definitions */ +#define DTRACE_OBJ_EVERY ((const char *)-1L) /* all known objects */ +#define DTRACE_OBJ_KMODS ((const char *)-2L) /* all kernel objects */ +#define DTRACE_OBJ_UMODS ((const char *)-3L) /* all user objects */ + +typedef struct dtrace_objinfo { + const char *dto_name; /* object file scope name */ + const char *dto_file; /* object file path (if any) */ + int dto_id; /* object file id (if any) */ + uint_t dto_flags; /* object flags (see below) */ + GElf_Addr dto_text_va; /* address of text section */ + GElf_Xword dto_text_size; /* size of text section */ + GElf_Addr dto_data_va; /* address of data section */ + GElf_Xword dto_data_size; /* size of data section */ + GElf_Addr dto_bss_va; /* address of BSS */ + GElf_Xword dto_bss_size; /* size of BSS */ +} dtrace_objinfo_t; + +#define DTRACE_OBJ_F_KERNEL 0x1 /* object is a kernel module */ +#define DTRACE_OBJ_F_PRIMARY 0x2 /* object is a primary module */ + +typedef int dtrace_obj_f(dtrace_hdl_t *, const dtrace_objinfo_t *, void *); + +extern int dtrace_object_iter(dtrace_hdl_t *, dtrace_obj_f *, void *); +extern int dtrace_object_info(dtrace_hdl_t *, const char *, dtrace_objinfo_t *); + +typedef struct dtrace_syminfo { + const char *dts_object; /* object name */ + const char *dts_name; /* symbol name */ + ulong_t dts_id; /* symbol id */ +} dtrace_syminfo_t; + +extern int dtrace_lookup_by_name(dtrace_hdl_t *, const char *, const char *, + GElf_Sym *, dtrace_syminfo_t *); + +extern int dtrace_lookup_by_addr(dtrace_hdl_t *, GElf_Addr addr, + GElf_Sym *, dtrace_syminfo_t *); + +typedef struct dtrace_typeinfo { + const char *dtt_object; /* object containing type */ + ctf_file_t *dtt_ctfp; /* CTF container handle */ + ctf_id_t dtt_type; /* CTF type identifier */ + uint_t dtt_flags; /* Misc. flags */ +} dtrace_typeinfo_t; + +#define DTT_FL_USER 0x1 /* user type */ + +extern int dtrace_lookup_by_type(dtrace_hdl_t *, const char *, const char *, + dtrace_typeinfo_t *); + +extern int dtrace_symbol_type(dtrace_hdl_t *, const GElf_Sym *, + const dtrace_syminfo_t *, dtrace_typeinfo_t *); + +extern int dtrace_type_strcompile(dtrace_hdl_t *, + const char *, dtrace_typeinfo_t *); + +extern int dtrace_type_fcompile(dtrace_hdl_t *, + FILE *, dtrace_typeinfo_t *); + +/* + * DTrace Probe Interface + * + * Library clients can use these functions to iterate over the set of available + * probe definitions and inquire as to their attributes. The probe iteration + * interfaces report probes that are declared as well as those from dtrace(7D). + */ +typedef struct dtrace_probeinfo { + dtrace_attribute_t dtp_attr; /* name attributes */ + dtrace_attribute_t dtp_arga; /* arg attributes */ + const dtrace_typeinfo_t *dtp_argv; /* arg types */ + int dtp_argc; /* arg count */ +} dtrace_probeinfo_t; + +typedef int dtrace_probe_f(dtrace_hdl_t *, const dtrace_probedesc_t *, void *); + +extern int dtrace_probe_iter(dtrace_hdl_t *, + const dtrace_probedesc_t *pdp, dtrace_probe_f *, void *); + +extern int dtrace_probe_info(dtrace_hdl_t *, + const dtrace_probedesc_t *, dtrace_probeinfo_t *); + +/* + * DTrace Vector Interface + * + * The DTrace library normally speaks directly to dtrace(7D). However, + * this communication may be vectored elsewhere. Consumers who wish to + * perform a vectored open must fill in the vector, and use the dtrace_vopen() + * entry point to obtain a library handle. + */ +struct dtrace_vector { +#if defined(sun) + int (*dtv_ioctl)(void *, int, void *); +#else + int (*dtv_ioctl)(void *, u_long, void *); +#endif + int (*dtv_lookup_by_addr)(void *, GElf_Addr, GElf_Sym *, + dtrace_syminfo_t *); + int (*dtv_status)(void *, processorid_t); + long (*dtv_sysconf)(void *, int); +}; + +/* + * DTrace Utility Functions + * + * Library clients can use these functions to convert addresses strings, to + * convert between string and integer probe descriptions and the + * dtrace_probedesc_t representation, and to perform similar conversions on + * stability attributes. + */ +extern int dtrace_addr2str(dtrace_hdl_t *, uint64_t, char *, int); +extern int dtrace_uaddr2str(dtrace_hdl_t *, pid_t, uint64_t, char *, int); + +extern int dtrace_xstr2desc(dtrace_hdl_t *, dtrace_probespec_t, + const char *, int, char *const [], dtrace_probedesc_t *); + +extern int dtrace_str2desc(dtrace_hdl_t *, dtrace_probespec_t, + const char *, dtrace_probedesc_t *); + +extern int dtrace_id2desc(dtrace_hdl_t *, dtrace_id_t, dtrace_probedesc_t *); + +#define DTRACE_DESC2STR_MAX 1024 /* min buf size for dtrace_desc2str() */ + +extern char *dtrace_desc2str(const dtrace_probedesc_t *, char *, size_t); + +#define DTRACE_ATTR2STR_MAX 64 /* min buf size for dtrace_attr2str() */ + +extern char *dtrace_attr2str(dtrace_attribute_t, char *, size_t); +extern int dtrace_str2attr(const char *, dtrace_attribute_t *); + +extern const char *dtrace_stability_name(dtrace_stability_t); +extern const char *dtrace_class_name(dtrace_class_t); + +extern int dtrace_provider_modules(dtrace_hdl_t *, const char **, int); + +extern const char *const _dtrace_version; +extern int _dtrace_debug; + +#ifdef __cplusplus +} +#endif + +#if !defined(sun) +#define _SC_CPUID_MAX _SC_NPROCESSORS_CONF +#define _SC_NPROCESSORS_MAX _SC_NPROCESSORS_CONF +#endif + +#endif /* _DTRACE_H */ diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/mkerrno.sh b/cddl/contrib/opensolaris/lib/libdtrace/common/mkerrno.sh new file mode 100755 index 0000000..50b7f1c --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/mkerrno.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +echo "\ +/*\n\ + * Copyright 2003 Sun Microsystems, Inc. All rights reserved.\n\ + * Use is subject to license terms.\n\ + */\n\ +\n\ +#pragma ident\t\"%Z%%M%\t%I%\t%E% SMI\"\n" + +pattern='^#define[ ]\(E[A-Z0-9]*\)[ ]*\([A-Z0-9]*\).*$' +replace='inline int \1 = \2;@#pragma D binding "1.0" \1' + +sed -n "s/$pattern/$replace/p" | tr '@' '\n' diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/mkerrtags.sh b/cddl/contrib/opensolaris/lib/libdtrace/common/mkerrtags.sh new file mode 100644 index 0000000..d5651ff --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/mkerrtags.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +BSDECHO=-e + +echo ${BSDECHO} "\ +/*\n\ + * Copyright 2003 Sun Microsystems, Inc. All rights reserved.\n\ + * Use is subject to license terms.\n\ + */\n\ +\n\ +#pragma ident\t\"%Z%%M%\t%I%\t%E% SMI\"\n\ +\n\ +#include <dt_errtags.h> +\n\ +static const char *const _dt_errtags[] = {" + +pattern='^ \(D_[A-Z0-9_]*\),*' +replace=' "\1",' + +sed -n "s/$pattern/$replace/p" || exit 1 + +echo ${BSDECHO} "\ +};\n\ +\n\ +static const int _dt_ntag = sizeof (_dt_errtags) / sizeof (_dt_errtags[0]);\n\ +\n\ +const char * +dt_errtag(dt_errtag_t tag) +{ + return (_dt_errtags[(tag > 0 && tag < _dt_ntag) ? tag : 0]); +}" + +exit 0 diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/mknames.sh b/cddl/contrib/opensolaris/lib/libdtrace/common/mknames.sh new file mode 100644 index 0000000..2fdc2fa --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/mknames.sh @@ -0,0 +1,55 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +BSDECHO=-e + +echo ${BSDECHO} "\ +/*\n\ + * Copyright 2005 Sun Microsystems, Inc. All rights reserved.\n\ + * Use is subject to license terms.\n\ + */\n\ +\n\ +#pragma ident\t\"%Z%%M%\t%I%\t%E% SMI\"\n\ +\n\ +#include <dtrace.h>\n\ +\n\ +/*ARGSUSED*/ +const char *\n\ +dtrace_subrstr(dtrace_hdl_t *dtp, int subr)\n\ +{\n\ + switch (subr) {" + +nawk ' +/^#define[ ]*DIF_SUBR_/ && $2 != "DIF_SUBR_MAX" { + printf("\tcase %s: return (\"%s\");\n", $2, tolower(substr($2, 10))); +}' + +echo ${BSDECHO} "\ + default: return (\"unknown\");\n\ + }\n\ +}" diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/mksignal.sh b/cddl/contrib/opensolaris/lib/libdtrace/common/mksignal.sh new file mode 100755 index 0000000..1bffa64 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/mksignal.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +echo "\ +/*\n\ + * Copyright 2003 Sun Microsystems, Inc. All rights reserved.\n\ + * Use is subject to license terms.\n\ + */\n\ +\n\ +#pragma ident\t\"%Z%%M%\t%I%\t%E% SMI\"\n" + +pattern='^#define[ ]*_*\(SIG[A-Z0-9]*\)[ ]\{1,\}\([A-Z0-9]*\).*$' +replace='inline int \1 = \2;@#pragma D binding "1.0" \1' + +sed -n "s/$pattern/$replace/p;/SIGRTMAX/q" | tr '@' '\n' diff --git a/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c b/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c new file mode 100644 index 0000000..9479e83 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c @@ -0,0 +1,537 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <stdlib.h> +#include <assert.h> +#include <errno.h> +#include <string.h> +#include <libgen.h> + +#include <dt_impl.h> +#include <dt_pid.h> + +#include <dis_tables.h> + +#if !defined(sun) +#define PR_MODEL_ILP32 1 +#define PR_MODEL_LP64 2 +#include <libproc_compat.h> +#endif + +#define DT_POPL_EBP 0x5d +#define DT_RET 0xc3 +#define DT_RET16 0xc2 +#define DT_LEAVE 0xc9 +#define DT_JMP32 0xe9 +#define DT_JMP8 0xeb +#define DT_REP 0xf3 + +#define DT_MOVL_EBP_ESP 0xe58b + +#define DT_ISJ32(op16) (((op16) & 0xfff0) == 0x0f80) +#define DT_ISJ8(op8) (((op8) & 0xf0) == 0x70) + +#define DT_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) + +static int dt_instr_size(uchar_t *, dtrace_hdl_t *, pid_t, uintptr_t, char); + +/*ARGSUSED*/ +int +dt_pid_create_entry_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp) +{ + ftp->ftps_type = DTFTP_ENTRY; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 1; + ftp->ftps_offs[0] = 0; + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + + return (1); +} + +static int +dt_pid_has_jump_table(struct ps_prochandle *P, dtrace_hdl_t *dtp, + uint8_t *text, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp) +{ + ulong_t i; + int size; +#if defined(sun) + pid_t pid = Pstatus(P)->pr_pid; + char dmodel = Pstatus(P)->pr_dmodel; +#else + pid_t pid = proc_getpid(P); +#if __i386__ + char dmodel = PR_MODEL_ILP32; +#elif __amd64__ + char dmodel = PR_MODEL_LP64; +#endif +#endif + + /* + * Take a pass through the function looking for a register-dependant + * jmp instruction. This could be a jump table so we have to be + * ultra conservative. + */ + for (i = 0; i < ftp->ftps_size; i += size) { + size = dt_instr_size(&text[i], dtp, pid, symp->st_value + i, + dmodel); + + /* + * Assume the worst if we hit an illegal instruction. + */ + if (size <= 0) { + dt_dprintf("error at %#lx (assuming jump table)\n", i); + return (1); + } + +#ifdef notyet + /* + * Register-dependant jmp instructions start with a 0xff byte + * and have the modrm.reg field set to 4. They can have an + * optional REX prefix on the 64-bit ISA. + */ + if ((text[i] == 0xff && DT_MODRM_REG(text[i + 1]) == 4) || + (dmodel == PR_MODEL_LP64 && (text[i] & 0xf0) == 0x40 && + text[i + 1] == 0xff && DT_MODRM_REG(text[i + 2]) == 4)) { + dt_dprintf("found a suspected jump table at %s:%lx\n", + ftp->ftps_func, i); + return (1); + } +#endif + } + + return (0); +} + +/*ARGSUSED*/ +int +dt_pid_create_return_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, uint64_t *stret) +{ + uint8_t *text; + ulong_t i, end; + int size; +#if defined(sun) + pid_t pid = Pstatus(P)->pr_pid; + char dmodel = Pstatus(P)->pr_dmodel; +#else + pid_t pid = proc_getpid(P); +#if __i386__ + char dmodel = PR_MODEL_ILP32; +#elif __amd64__ + char dmodel = PR_MODEL_LP64; +#endif +#endif + + /* + * We allocate a few extra bytes at the end so we don't have to check + * for overrunning the buffer. + */ + if ((text = calloc(1, symp->st_size + 4)) == NULL) { + dt_dprintf("mr sparkle: malloc() failed\n"); + return (DT_PROC_ERR); + } + + if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) { + dt_dprintf("mr sparkle: Pread() failed\n"); + free(text); + return (DT_PROC_ERR); + } + + ftp->ftps_type = DTFTP_RETURN; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 0; + + /* + * If there's a jump table in the function we're only willing to + * instrument these specific (and equivalent) instruction sequences: + * leave + * [rep] ret + * and + * movl %ebp,%esp + * popl %ebp + * [rep] ret + * + * We do this to avoid accidentally interpreting jump table + * offsets as actual instructions. + */ + if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) { + for (i = 0, end = ftp->ftps_size; i < end; i += size) { + size = dt_instr_size(&text[i], dtp, pid, + symp->st_value + i, dmodel); + + /* bail if we hit an invalid opcode */ + if (size <= 0) + break; + + if (text[i] == DT_LEAVE && text[i + 1] == DT_RET) { + dt_dprintf("leave/ret at %lx\n", i + 1); + ftp->ftps_offs[ftp->ftps_noffs++] = i + 1; + size = 2; + } else if (text[i] == DT_LEAVE && + text[i + 1] == DT_REP && text[i + 2] == DT_RET) { + dt_dprintf("leave/rep ret at %lx\n", i + 1); + ftp->ftps_offs[ftp->ftps_noffs++] = i + 1; + size = 3; + } else if (*(uint16_t *)&text[i] == DT_MOVL_EBP_ESP && + text[i + 2] == DT_POPL_EBP && + text[i + 3] == DT_RET) { + dt_dprintf("movl/popl/ret at %lx\n", i + 3); + ftp->ftps_offs[ftp->ftps_noffs++] = i + 3; + size = 4; + } else if (*(uint16_t *)&text[i] == DT_MOVL_EBP_ESP && + text[i + 2] == DT_POPL_EBP && + text[i + 3] == DT_REP && + text[i + 4] == DT_RET) { + dt_dprintf("movl/popl/rep ret at %lx\n", i + 3); + ftp->ftps_offs[ftp->ftps_noffs++] = i + 3; + size = 5; + } + } + } else { + for (i = 0, end = ftp->ftps_size; i < end; i += size) { + size = dt_instr_size(&text[i], dtp, pid, + symp->st_value + i, dmodel); + + /* bail if we hit an invalid opcode */ + if (size <= 0) + break; + + /* ordinary ret */ + if (size == 1 && text[i] == DT_RET) + goto is_ret; + + /* two-byte ret */ + if (size == 2 && text[i] == DT_REP && + text[i + 1] == DT_RET) + goto is_ret; + + /* ret <imm16> */ + if (size == 3 && text[i] == DT_RET16) + goto is_ret; + + /* two-byte ret <imm16> */ + if (size == 4 && text[i] == DT_REP && + text[i + 1] == DT_RET16) + goto is_ret; + + /* 32-bit displacement jmp outside of the function */ + if (size == 5 && text[i] == DT_JMP32 && symp->st_size <= + (uintptr_t)(i + size + *(int32_t *)&text[i + 1])) + goto is_ret; + + /* 8-bit displacement jmp outside of the function */ + if (size == 2 && text[i] == DT_JMP8 && symp->st_size <= + (uintptr_t)(i + size + *(int8_t *)&text[i + 1])) + goto is_ret; + + /* 32-bit disp. conditional jmp outside of the func. */ + if (size == 6 && DT_ISJ32(*(uint16_t *)&text[i]) && + symp->st_size <= + (uintptr_t)(i + size + *(int32_t *)&text[i + 2])) + goto is_ret; + + /* 8-bit disp. conditional jmp outside of the func. */ + if (size == 2 && DT_ISJ8(text[i]) && symp->st_size <= + (uintptr_t)(i + size + *(int8_t *)&text[i + 1])) + goto is_ret; + + continue; +is_ret: + dt_dprintf("return at offset %lx\n", i); + ftp->ftps_offs[ftp->ftps_noffs++] = i; + } + } + + free(text); + if (ftp->ftps_noffs > 0) { + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + } + + return (ftp->ftps_noffs); +} + +/*ARGSUSED*/ +int +dt_pid_create_offset_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, ulong_t off) +{ + ftp->ftps_type = DTFTP_OFFSETS; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 1; + + if (strcmp("-", ftp->ftps_func) == 0) { + ftp->ftps_offs[0] = off; + } else { + uint8_t *text; + ulong_t i; + int size; +#if defined(sun) + pid_t pid = Pstatus(P)->pr_pid; + char dmodel = Pstatus(P)->pr_dmodel; +#else + pid_t pid = proc_getpid(P); +#if __i386__ + char dmodel = PR_MODEL_ILP32; +#elif __amd64__ + char dmodel = PR_MODEL_LP64; +#endif +#endif + + if ((text = malloc(symp->st_size)) == NULL) { + dt_dprintf("mr sparkle: malloc() failed\n"); + return (DT_PROC_ERR); + } + + if (Pread(P, text, symp->st_size, symp->st_value) != + symp->st_size) { + dt_dprintf("mr sparkle: Pread() failed\n"); + free(text); + return (DT_PROC_ERR); + } + + /* + * We can't instrument offsets in functions with jump tables + * as we might interpret a jump table offset as an + * instruction. + */ + if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) { + free(text); + return (0); + } + + for (i = 0; i < symp->st_size; i += size) { + if (i == off) { + ftp->ftps_offs[0] = i; + break; + } + + /* + * If we've passed the desired offset without a + * match, then the given offset must not lie on a + * instruction boundary. + */ + if (i > off) { + free(text); + return (DT_PROC_ALIGN); + } + + size = dt_instr_size(&text[i], dtp, pid, + symp->st_value + i, dmodel); + + /* + * If we hit an invalid instruction, bail as if we + * couldn't find the offset. + */ + if (size <= 0) { + free(text); + return (DT_PROC_ALIGN); + } + } + + free(text); + } + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + + return (ftp->ftps_noffs); +} + +/*ARGSUSED*/ +int +dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, const char *pattern) +{ + uint8_t *text; + int size; + ulong_t i, end = symp->st_size; +#if defined(sun) + pid_t pid = Pstatus(P)->pr_pid; + char dmodel = Pstatus(P)->pr_dmodel; +#else + pid_t pid = proc_getpid(P); +#if __i386__ + char dmodel = PR_MODEL_ILP32; +#elif __amd64__ + char dmodel = PR_MODEL_LP64; +#endif +#endif + + ftp->ftps_type = DTFTP_OFFSETS; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 0; + + if ((text = malloc(symp->st_size)) == NULL) { + dt_dprintf("mr sparkle: malloc() failed\n"); + return (DT_PROC_ERR); + } + + if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) { + dt_dprintf("mr sparkle: Pread() failed\n"); + free(text); + return (DT_PROC_ERR); + } + + /* + * We can't instrument offsets in functions with jump tables as + * we might interpret a jump table offset as an instruction. + */ + if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) { + free(text); + return (0); + } + + if (strcmp("*", pattern) == 0) { + for (i = 0; i < end; i += size) { + ftp->ftps_offs[ftp->ftps_noffs++] = i; + + size = dt_instr_size(&text[i], dtp, pid, + symp->st_value + i, dmodel); + + /* bail if we hit an invalid opcode */ + if (size <= 0) + break; + } + } else { + char name[sizeof (i) * 2 + 1]; + + for (i = 0; i < end; i += size) { + (void) snprintf(name, sizeof (name), "%lx", i); + if (gmatch(name, pattern)) + ftp->ftps_offs[ftp->ftps_noffs++] = i; + + size = dt_instr_size(&text[i], dtp, pid, + symp->st_value + i, dmodel); + + /* bail if we hit an invalid opcode */ + if (size <= 0) + break; + } + } + + free(text); + if (ftp->ftps_noffs > 0) { + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + } + + return (ftp->ftps_noffs); +} + +typedef struct dtrace_dis { + uchar_t *instr; + dtrace_hdl_t *dtp; + pid_t pid; + uintptr_t addr; +} dtrace_dis_t; + +static int +dt_getbyte(void *data) +{ + dtrace_dis_t *dis = data; + int ret = *dis->instr; + + if (ret == FASTTRAP_INSTR) { + fasttrap_instr_query_t instr; + + instr.ftiq_pid = dis->pid; + instr.ftiq_pc = dis->addr; + + /* + * If we hit a byte that looks like the fasttrap provider's + * trap instruction (which doubles as the breakpoint + * instruction for debuggers) we need to query the kernel + * for the real value. This may just be part of an immediate + * value so there's no need to return an error if the + * kernel doesn't know about this address. + */ + if (ioctl(dis->dtp->dt_ftfd, FASTTRAPIOC_GETINSTR, &instr) == 0) + ret = instr.ftiq_instr; + } + + dis->addr++; + dis->instr++; + + return (ret); +} + +static int +dt_instr_size(uchar_t *instr, dtrace_hdl_t *dtp, pid_t pid, uintptr_t addr, + char dmodel) +{ + dtrace_dis_t data; + dis86_t x86dis; + uint_t cpu_mode; + + data.instr = instr; + data.dtp = dtp; + data.pid = pid; + data.addr = addr; + + x86dis.d86_data = &data; + x86dis.d86_get_byte = dt_getbyte; + x86dis.d86_check_func = NULL; + + cpu_mode = (dmodel == PR_MODEL_ILP32) ? SIZE32 : SIZE64; + + if (dtrace_disx86(&x86dis, cpu_mode) != 0) + return (-1); + + /* + * If the instruction was a single-byte breakpoint, there may be + * another debugger attached to this process. The original instruction + * can't be recovered so this must fail. + */ + if (x86dis.d86_len == 1 && + (uchar_t)x86dis.d86_bytes[0] == FASTTRAP_INSTR) + return (-1); + + return (x86dis.d86_len); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/i386/regs.d.in b/cddl/contrib/opensolaris/lib/libdtrace/i386/regs.d.in new file mode 100644 index 0000000..3328f33 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/i386/regs.d.in @@ -0,0 +1,117 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +inline int R_GS = @GS@; +#pragma D binding "1.0" R_GS +inline int R_FS = @FS@; +#pragma D binding "1.0" R_FS +inline int R_ES = @ES@; +#pragma D binding "1.0" R_ES +inline int R_DS = @DS@; +#pragma D binding "1.0" R_DS + +inline int R_EDI = @EDI@; +#pragma D binding "1.0" R_EDI +inline int R_ESI = @ESI@; +#pragma D binding "1.0" R_ESI +inline int R_EBP = @EBP@; +#pragma D binding "1.0" R_EBP +inline int R_ESP = @ESP@; +#pragma D binding "1.0" R_ESP +inline int R_EBX = @EBX@; +#pragma D binding "1.0" R_EBX +inline int R_EDX = @EDX@; +#pragma D binding "1.0" R_EDX +inline int R_ECX = @ECX@; +#pragma D binding "1.0" R_ECX +inline int R_EAX = @EAX@; +#pragma D binding "1.0" R_EAX + +inline int R_TRAPNO = @TRAPNO@; +#pragma D binding "1.0" R_TRAPNO +inline int R_ERR = @ERR@; +#pragma D binding "1.0" R_ERR +inline int R_EIP = @EIP@; +#pragma D binding "1.0" R_EIP +inline int R_CS = @CS@; +#pragma D binding "1.0" R_CS +inline int R_EFL = @EFL@; +#pragma D binding "1.0" R_EFL +inline int R_UESP = @UESP@; +#pragma D binding "1.0" R_UESP +inline int R_SS = @SS@; +#pragma D binding "1.0" R_SS + +inline int R_PC = R_EIP; +#pragma D binding "1.0" R_PC +inline int R_SP = R_UESP; +#pragma D binding "1.0" R_SP +inline int R_PS = R_EFL; +#pragma D binding "1.0" R_PS +inline int R_R0 = R_EAX; +#pragma D binding "1.0" R_R0 +inline int R_R1 = R_EBX; +#pragma D binding "1.0" R_R1 + +inline int R_RSP = @REG_RSP@; +#pragma D binding "1.0" R_RSP +inline int R_RFL = @REG_RFL@; +#pragma D binding "1.0" R_RFL +inline int R_RIP = @REG_RIP@; +#pragma D binding "1.0" R_RIP +inline int R_RAX = @REG_RAX@; +#pragma D binding "1.0" R_RAX +inline int R_RCX = @REG_RCX@; +#pragma D binding "1.0" R_RCX +inline int R_RDX = @REG_RDX@; +#pragma D binding "1.0" R_RDX +inline int R_RBX = @REG_RBX@; +#pragma D binding "1.0" R_RBX +inline int R_RBP = @REG_RBP@; +#pragma D binding "1.0" R_RBP +inline int R_RSI = @REG_RSI@; +#pragma D binding "1.0" R_RSI +inline int R_RDI = @REG_RDI@; +#pragma D binding "1.0" R_RDI +inline int R_R8 = @REG_R8@; +#pragma D binding "1.0" R_R8 +inline int R_R9 = @REG_R9@; +#pragma D binding "1.0" R_R9 +inline int R_R10 = @REG_R10@; +#pragma D binding "1.0" R_R10 +inline int R_R11 = @REG_R11@; +#pragma D binding "1.0" R_R11 +inline int R_R12 = @REG_R12@; +#pragma D binding "1.0" R_R12 +inline int R_R13 = @REG_R13@; +#pragma D binding "1.0" R_R13 +inline int R_R14 = @REG_R14@; +#pragma D binding "1.0" R_R14 +inline int R_R15 = @REG_R15@; +#pragma D binding "1.0" R_R15 + diff --git a/cddl/contrib/opensolaris/lib/libdtrace/i386/regs.sed.in b/cddl/contrib/opensolaris/lib/libdtrace/i386/regs.sed.in new file mode 100644 index 0000000..2b2080f --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/i386/regs.sed.in @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This file is a sed script which is first preprocessed by cpp or cc -E to + * define a set of sed directives which replace #define tokens with their + * values. After preprocessing, whitespace is eliminated, and any @ symbols + * are translated into single space. The resulting sed script is then run + * over regs.d.in to replace the #define tokens listed below to create the + * finished regs.d. Refer to the rules in libdtrace/i386/Makefile for more + * information. + */ + +#include <sys/regset.h> + +#define SED_REPLACE(x) s/#x/x/g +#define SED_REPLACE64(x) s/#x/SS @+@1@+@ x/g + +SED_REPLACE(GS) +SED_REPLACE(FS) +SED_REPLACE(ES) +SED_REPLACE(DS) +SED_REPLACE(EDI) +SED_REPLACE(ESI) +SED_REPLACE(EBP) +SED_REPLACE(ESP) +SED_REPLACE(EBX) +SED_REPLACE(EDX) +SED_REPLACE(ECX) +SED_REPLACE(EAX) +SED_REPLACE(TRAPNO) +SED_REPLACE(ERR) +SED_REPLACE(EIP) +SED_REPLACE(CS) +SED_REPLACE(EFL) +SED_REPLACE(UESP) +SED_REPLACE(SS) + +SED_REPLACE64(REG_RSP) +SED_REPLACE64(REG_RFL) +SED_REPLACE64(REG_RIP) +SED_REPLACE64(REG_RAX) +SED_REPLACE64(REG_RCX) +SED_REPLACE64(REG_RDX) +SED_REPLACE64(REG_RBX) +SED_REPLACE64(REG_RBP) +SED_REPLACE64(REG_RSI) +SED_REPLACE64(REG_RDI) +SED_REPLACE64(REG_R8) +SED_REPLACE64(REG_R9) +SED_REPLACE64(REG_R10) +SED_REPLACE64(REG_R11) +SED_REPLACE64(REG_R12) +SED_REPLACE64(REG_R13) +SED_REPLACE64(REG_R14) +SED_REPLACE64(REG_R15) + diff --git a/cddl/contrib/opensolaris/lib/libdtrace/mips/dt_isadep.c b/cddl/contrib/opensolaris/lib/libdtrace/mips/dt_isadep.c new file mode 100644 index 0000000..1aeb95f --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/mips/dt_isadep.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdlib.h> +#include <assert.h> +#include <errno.h> +#include <string.h> +#include <libgen.h> + +#include <dt_impl.h> +#include <dt_pid.h> + +/*ARGSUSED*/ +int +dt_pid_create_entry_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp) +{ + + dt_dprintf("%s: unimplemented\n", __func__); + return (DT_PROC_ERR); +} + +int +dt_pid_create_return_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, uint64_t *stret) +{ + + dt_dprintf("%s: unimplemented\n", __func__); + return (DT_PROC_ERR); +} + +/*ARGSUSED*/ +int +dt_pid_create_offset_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, ulong_t off) +{ + + dt_dprintf("%s: unimplemented\n", __func__); + return (DT_PROC_ERR); +} + +/*ARGSUSED*/ +int +dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, const char *pattern) +{ + + dt_dprintf("%s: unimplemented\n", __func__); + return (DT_PROC_ERR); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/powerpc/dt_isadep.c b/cddl/contrib/opensolaris/lib/libdtrace/powerpc/dt_isadep.c new file mode 100644 index 0000000..f4b02c9 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/powerpc/dt_isadep.c @@ -0,0 +1,197 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdlib.h> +#include <assert.h> +#include <errno.h> +#include <string.h> +#include <libgen.h> + +#include <dt_impl.h> +#include <dt_pid.h> + +#include <libproc_compat.h> + +/*ARGSUSED*/ +int +dt_pid_create_entry_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp) +{ + ftp->ftps_type = DTFTP_ENTRY; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 1; + ftp->ftps_offs[0] = 0; + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + + return (1); +} + +int +dt_pid_create_return_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, uint64_t *stret) +{ + + uintptr_t temp; + uint32_t *text; + int i; + int srdepth = 0; + + if ((text = malloc(symp->st_size + 4)) == NULL) { + dt_dprintf("mr sparkle: malloc() failed\n"); + return (DT_PROC_ERR); + } + + if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) { + dt_dprintf("mr sparkle: Pread() failed\n"); + free(text); + return (DT_PROC_ERR); + } + + /* + * Leave a dummy instruction in the last slot to simplify edge + * conditions. + */ + text[symp->st_size / 4] = 0; + + ftp->ftps_type = DTFTP_RETURN; + ftp->ftps_pc = symp->st_value; + ftp->ftps_size = symp->st_size; + ftp->ftps_noffs = 0; + + for (i = 0; i < symp->st_size / 4; i++) { + + if ((text[i] & 0xfc000001) != 0x48000000 && + text[i] != 0x4e800020) + continue; + + /* + * Check for a jump within this function. If it's outside this + * function then it's a tail-call, so a return point. + */ + if ((text[i] & 0xfc000000) == 0x48000000) { + temp = (text[i] & 0x03fffffc); + /* Bit 30 denotes an absolute address. */ + if (!(text[i] & 0x02)) { + temp += symp->st_value + i * 4; + } + else { + /* Sign extend the absolute address. */ + if (temp & 0x02000000) { + temp |= (UINTPTR_MAX - 0x03ffffff); + } + } + if (temp >= symp->st_value && + temp <= (symp->st_value + symp->st_size)) + continue; + } + dt_dprintf("return at offset %x\n", i * 4); + ftp->ftps_offs[ftp->ftps_noffs++] = i * 4; + } + + free(text); + if (ftp->ftps_noffs > 0) { + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + } + + + return (ftp->ftps_noffs); +} + +/*ARGSUSED*/ +int +dt_pid_create_offset_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, ulong_t off) +{ + if (off & 0x3) + return (DT_PROC_ALIGN); + + ftp->ftps_type = DTFTP_OFFSETS; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 1; + ftp->ftps_offs[0] = off; + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + + return (1); +} + +/*ARGSUSED*/ +int +dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, const char *pattern) +{ + ulong_t i; + + ftp->ftps_type = DTFTP_OFFSETS; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 0; + + /* + * If we're matching against everything, just iterate through each + * instruction in the function, otherwise look for matching offset + * names by constructing the string and comparing it against the + * pattern. + */ + if (strcmp("*", pattern) == 0) { + for (i = 0; i < symp->st_size; i += 4) { + ftp->ftps_offs[ftp->ftps_noffs++] = i; + } + } else { + char name[sizeof (i) * 2 + 1]; + + for (i = 0; i < symp->st_size; i += 4) { + (void) sprintf(name, "%lx", i); + if (gmatch(name, pattern)) + ftp->ftps_offs[ftp->ftps_noffs++] = i; + } + } + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + + return (ftp->ftps_noffs); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/sparc/dt_isadep.c b/cddl/contrib/opensolaris/lib/libdtrace/sparc/dt_isadep.c new file mode 100644 index 0000000..ed05275 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/sparc/dt_isadep.c @@ -0,0 +1,338 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdlib.h> +#include <assert.h> +#include <errno.h> +#include <string.h> +#include <libgen.h> + +#include <dt_impl.h> +#include <dt_pid.h> + +#define OP(x) ((x) >> 30) +#define OP2(x) (((x) >> 22) & 0x07) +#define COND(x) (((x) >> 25) & 0x0f) +#define A(x) (((x) >> 29) & 0x01) + +#define OP_BRANCH 0 + +#define OP2_BPcc 0x1 +#define OP2_Bicc 0x2 +#define OP2_BPr 0x3 +#define OP2_FBPfcc 0x5 +#define OP2_FBfcc 0x6 + +/*ARGSUSED*/ +int +dt_pid_create_entry_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp) +{ + ftp->ftps_type = DTFTP_ENTRY; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 1; + ftp->ftps_offs[0] = 0; + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + + return (1); +} + +int +dt_pid_create_return_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, uint64_t *stret) +{ + + uint32_t *text; + int i; + int srdepth = 0; + + if ((text = malloc(symp->st_size + 4)) == NULL) { + dt_dprintf("mr sparkle: malloc() failed\n"); + return (DT_PROC_ERR); + } + + if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) { + dt_dprintf("mr sparkle: Pread() failed\n"); + free(text); + return (DT_PROC_ERR); + } + + /* + * Leave a dummy instruction in the last slot to simplify edge + * conditions. + */ + text[symp->st_size / 4] = 0; + + ftp->ftps_type = DTFTP_RETURN; + ftp->ftps_pc = symp->st_value; + ftp->ftps_size = symp->st_size; + ftp->ftps_noffs = 0; + + for (i = 0; i < symp->st_size / 4; i++) { + /* + * If we encounter an existing tracepoint, query the + * kernel to find out the instruction that was + * replaced at this spot. + */ + while (text[i] == FASTTRAP_INSTR) { + fasttrap_instr_query_t instr; + + instr.ftiq_pid = Pstatus(P)->pr_pid; + instr.ftiq_pc = symp->st_value + i * 4; + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_GETINSTR, + &instr) != 0) { + + if (errno == ESRCH || errno == ENOENT) { + if (Pread(P, &text[i], 4, + instr.ftiq_pc) != 4) { + dt_dprintf("mr sparkle: " + "Pread() failed\n"); + free(text); + return (DT_PROC_ERR); + } + continue; + } + + free(text); + dt_dprintf("mr sparkle: getinstr query " + "failed: %s\n", strerror(errno)); + return (DT_PROC_ERR); + } + + text[i] = instr.ftiq_instr; + break; + } + + /* save */ + if ((text[i] & 0xc1f80000) == 0x81e00000) { + srdepth++; + continue; + } + + /* restore */ + if ((text[i] & 0xc1f80000) == 0x81e80000) { + srdepth--; + continue; + } + + if (srdepth > 0) { + /* ret */ + if (text[i] == 0x81c7e008) + goto is_ret; + + /* return */ + if (text[i] == 0x81cfe008) + goto is_ret; + + /* call or jmpl w/ restore in the slot */ + if (((text[i] & 0xc0000000) == 0x40000000 || + (text[i] & 0xc1f80000) == 0x81c00000) && + (text[i + 1] & 0xc1f80000) == 0x81e80000) + goto is_ret; + + /* call to one of the stret routines */ + if ((text[i] & 0xc0000000) == 0x40000000) { + int32_t disp = text[i] << 2; + uint64_t dest = ftp->ftps_pc + i * 4 + disp; + + dt_dprintf("dest = %llx\n", (u_longlong_t)dest); + + if (dest == stret[0] || dest == stret[1] || + dest == stret[2] || dest == stret[3]) + goto is_ret; + } + } else { + /* external call */ + if ((text[i] & 0xc0000000) == 0x40000000) { + int32_t dst = text[i] << 2; + + dst += i * 4; + + if ((uintptr_t)dst >= (uintptr_t)symp->st_size) + goto is_ret; + } + + /* jmpl into %g0 -- this includes the retl pseudo op */ + if ((text[i] & 0xfff80000) == 0x81c00000) + goto is_ret; + + /* external branch -- possible return site */ + if (OP(text[i]) == OP_BRANCH) { + int32_t dst; + int baa; + + switch (OP2(text[i])) { + case OP2_BPcc: + dst = text[i] & 0x7ffff; + dst <<= 13; + dst >>= 11; + + baa = COND(text[i]) == 8 && A(text[i]); + break; + case OP2_Bicc: + dst = text[i] & 0x3fffff; + dst <<= 10; + dst >>= 8; + + baa = COND(text[i]) == 8 && A(text[i]); + break; + case OP2_BPr: + dst = (((text[i]) >> 6) & 0xc000) | + ((text[i]) & 0x3fff); + dst <<= 16; + dst >>= 14; + + baa = 0; + break; + case OP2_FBPfcc: + dst = text[i] & 0x7ffff; + dst <<= 13; + dst >>= 11; + + baa = COND(text[i]) == 8 && A(text[i]); + break; + case OP2_FBfcc: + dst = text[i] & 0x3fffff; + dst <<= 10; + dst >>= 8; + + baa = COND(text[i]) == 8 && A(text[i]); + break; + default: + continue; + } + + dst += i * 4; + + /* + * Interpret branches outside of the function's + * bounds as potential return sites. If the + * branch is a ba,a don't skip the instruction + * in the delay slot. + */ + if ((uintptr_t)dst >= + (uintptr_t)symp->st_size) { + if (baa) + goto is_ret_baa; + else + goto is_ret; + } + } + } + + continue; +is_ret: + i++; +is_ret_baa: + dt_dprintf("return at offset %x\n", i * 4); + ftp->ftps_offs[ftp->ftps_noffs++] = i * 4; + } + + free(text); + if (ftp->ftps_noffs > 0) { + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + } + + + return (ftp->ftps_noffs); +} + +/*ARGSUSED*/ +int +dt_pid_create_offset_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, ulong_t off) +{ + if (off & 0x3) + return (DT_PROC_ALIGN); + + ftp->ftps_type = DTFTP_OFFSETS; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 1; + ftp->ftps_offs[0] = off; + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + + return (1); +} + +/*ARGSUSED*/ +int +dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp, + fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, const char *pattern) +{ + ulong_t i; + + ftp->ftps_type = DTFTP_OFFSETS; + ftp->ftps_pc = (uintptr_t)symp->st_value; + ftp->ftps_size = (size_t)symp->st_size; + ftp->ftps_noffs = 0; + + /* + * If we're matching against everything, just iterate through each + * instruction in the function, otherwise look for matching offset + * names by constructing the string and comparing it against the + * pattern. + */ + if (strcmp("*", pattern) == 0) { + for (i = 0; i < symp->st_size; i += 4) { + ftp->ftps_offs[ftp->ftps_noffs++] = i; + } + } else { + char name[sizeof (i) * 2 + 1]; + + for (i = 0; i < symp->st_size; i += 4) { + (void) sprintf(name, "%lx", i); + if (gmatch(name, pattern)) + ftp->ftps_offs[ftp->ftps_noffs++] = i; + } + } + + if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) { + dt_dprintf("fasttrap probe creation ioctl failed: %s\n", + strerror(errno)); + return (dt_set_errno(dtp, errno)); + } + + return (ftp->ftps_noffs); +} diff --git a/cddl/contrib/opensolaris/lib/libdtrace/sparc/regs.d b/cddl/contrib/opensolaris/lib/libdtrace/sparc/regs.d new file mode 100644 index 0000000..7c4bc0f --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libdtrace/sparc/regs.d @@ -0,0 +1,120 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +inline int R_G0 = 0; +#pragma D binding "1.0" R_G0 +inline int R_G1 = 1; +#pragma D binding "1.0" R_G1 +inline int R_G2 = 2; +#pragma D binding "1.0" R_G2 +inline int R_G3 = 3; +#pragma D binding "1.0" R_G3 +inline int R_G4 = 4; +#pragma D binding "1.0" R_G4 +inline int R_G5 = 5; +#pragma D binding "1.0" R_G5 +inline int R_G6 = 6; +#pragma D binding "1.0" R_G6 +inline int R_G7 = 7; +#pragma D binding "1.0" R_G7 + +inline int R_O0 = 8; +#pragma D binding "1.0" R_O0 +inline int R_O1 = 9; +#pragma D binding "1.0" R_O1 +inline int R_O2 = 10; +#pragma D binding "1.0" R_O2 +inline int R_O3 = 11; +#pragma D binding "1.0" R_O3 +inline int R_O4 = 12; +#pragma D binding "1.0" R_O4 +inline int R_O5 = 13; +#pragma D binding "1.0" R_O5 +inline int R_O6 = 14; +#pragma D binding "1.0" R_O6 +inline int R_O7 = 15; +#pragma D binding "1.0" R_O7 + +inline int R_L0 = 16; +#pragma D binding "1.0" R_L0 +inline int R_L1 = 17; +#pragma D binding "1.0" R_L1 +inline int R_L2 = 18; +#pragma D binding "1.0" R_L2 +inline int R_L3 = 19; +#pragma D binding "1.0" R_L3 +inline int R_L4 = 20; +#pragma D binding "1.0" R_L4 +inline int R_L5 = 21; +#pragma D binding "1.0" R_L5 +inline int R_L6 = 22; +#pragma D binding "1.0" R_L6 +inline int R_L7 = 23; +#pragma D binding "1.0" R_L7 + +inline int R_I0 = 24; +#pragma D binding "1.0" R_I0 +inline int R_I1 = 25; +#pragma D binding "1.0" R_I1 +inline int R_I2 = 26; +#pragma D binding "1.0" R_I2 +inline int R_I3 = 27; +#pragma D binding "1.0" R_I3 +inline int R_I4 = 28; +#pragma D binding "1.0" R_I4 +inline int R_I5 = 29; +#pragma D binding "1.0" R_I5 +inline int R_I6 = 30; +#pragma D binding "1.0" R_I6 +inline int R_I7 = 31; +#pragma D binding "1.0" R_I7 + +inline int R_CCR = 32; +#pragma D binding "1.0" R_CCR +inline int R_PC = 33; +#pragma D binding "1.0" R_PC +inline int R_nPC = 34; +#pragma D binding "1.0" R_nPC +inline int R_NPC = R_nPC; +#pragma D binding "1.0" R_NPC +inline int R_Y = 35; +#pragma D binding "1.0" R_Y +inline int R_ASI = 36; +#pragma D binding "1.0" R_ASI +inline int R_FPRS = 37; +#pragma D binding "1.0" R_FPRS +inline int R_PS = R_CCR; +#pragma D binding "1.0" R_PS +inline int R_SP = R_O6; +#pragma D binding "1.0" R_SP +inline int R_FP = R_I6; +#pragma D binding "1.0" R_FP +inline int R_R0 = R_O0; +#pragma D binding "1.0" R_R0 +inline int R_R1 = R_O1; +#pragma D binding "1.0" R_R1 diff --git a/cddl/contrib/opensolaris/lib/libgen/common/gmatch.c b/cddl/contrib/opensolaris/lib/libgen/common/gmatch.c new file mode 100644 index 0000000..199fbb3 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libgen/common/gmatch.c @@ -0,0 +1,175 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(sun) +#pragma weak gmatch = _gmatch + +#include "gen_synonyms.h" +#endif +#include <sys/types.h> +#include <libgen.h> +#include <stdlib.h> +#include <limits.h> +#if defined(sun) +#include <widec.h> +#include "_range.h" +#else +#include <ctype.h> +/* DOODAD */ static int multibyte = 0; +#define WCHAR_CSMASK 0x30000000 +#define valid_range(c1, c2) \ + (((c1) & WCHAR_CSMASK) == ((c2) & WCHAR_CSMASK) && \ + ((c1) > 0xff || !iscntrl((int)c1)) && ((c2) > 0xff || \ + !iscntrl((int)c2))) +#endif + +#define Popwchar(p, c) \ + n = mbtowc(&cl, p, MB_LEN_MAX); \ + c = cl; \ + if (n <= 0) \ + return (0); \ + p += n + +int +gmatch(const char *s, const char *p) +{ + const char *olds; + wchar_t scc, c; + int n; + wchar_t cl; + + olds = s; + n = mbtowc(&cl, s, MB_LEN_MAX); + if (n <= 0) { + s++; + scc = n; + } else { + scc = cl; + s += n; + } + n = mbtowc(&cl, p, MB_LEN_MAX); + if (n < 0) + return (0); + if (n == 0) + return (scc == 0); + p += n; + c = cl; + + switch (c) { + case '[': + if (scc <= 0) + return (0); + { + int ok; + wchar_t lc = 0; + int notflag = 0; + + ok = 0; + if (*p == '!') { + notflag = 1; + p++; + } + Popwchar(p, c); + do + { + if (c == '-' && lc && *p != ']') { + Popwchar(p, c); + if (c == '\\') { + Popwchar(p, c); + } + if (notflag) { + if (!multibyte || + valid_range(lc, c)) { + if (scc < lc || scc > c) + ok++; + else + return (0); + } + } else { + if (!multibyte || + valid_range(lc, c)) + if (lc <= scc && + scc <= c) + ok++; + } + } else if (c == '\\') { + /* skip to quoted character */ + Popwchar(p, c); + } + lc = c; + if (notflag) { + if (scc != lc) + ok++; + else + return (0); + } + else + { + if (scc == lc) + ok++; + } + Popwchar(p, c); + } while (c != ']'); + return (ok ? gmatch(s, p) : 0); + } + + case '\\': + /* skip to quoted character and see if it matches */ + Popwchar(p, c); + + default: + if (c != scc) + return (0); + /*FALLTHRU*/ + + case '?': + return (scc > 0 ? gmatch(s, p) : 0); + + case '*': + while (*p == '*') + p++; + + if (*p == 0) + return (1); + s = olds; + while (*s) { + if (gmatch(s, p)) + return (1); + n = mbtowc(&cl, s, MB_LEN_MAX); + if (n < 0) + /* skip past illegal byte sequence */ + s++; + else + s += n; + } + return (0); + } +} diff --git a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c new file mode 100644 index 0000000..7a265bd --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c @@ -0,0 +1,1273 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <solaris.h> +#include <inttypes.h> +#include <unistd.h> +#include <strings.h> +#include <libintl.h> +#include <stdarg.h> +#include "libnvpair.h" + +/* + * libnvpair - A tools library for manipulating <name, value> pairs. + * + * This library provides routines packing an unpacking nv pairs + * for transporting data across process boundaries, transporting + * between kernel and userland, and possibly saving onto disk files. + */ + +/* + * Print control structure. + */ + +#define DEFINEOP(opname, vtype) \ + struct { \ + int (*op)(struct nvlist_prtctl *, void *, nvlist_t *, \ + const char *, vtype); \ + void *arg; \ + } opname + +#define DEFINEARROP(opname, vtype) \ + struct { \ + int (*op)(struct nvlist_prtctl *, void *, nvlist_t *, \ + const char *, vtype, uint_t); \ + void *arg; \ + } opname + +struct nvlist_printops { + DEFINEOP(print_boolean, int); + DEFINEOP(print_boolean_value, boolean_t); + DEFINEOP(print_byte, uchar_t); + DEFINEOP(print_int8, int8_t); + DEFINEOP(print_uint8, uint8_t); + DEFINEOP(print_int16, int16_t); + DEFINEOP(print_uint16, uint16_t); + DEFINEOP(print_int32, int32_t); + DEFINEOP(print_uint32, uint32_t); + DEFINEOP(print_int64, int64_t); + DEFINEOP(print_uint64, uint64_t); + DEFINEOP(print_double, double); + DEFINEOP(print_string, char *); + DEFINEOP(print_hrtime, hrtime_t); + DEFINEOP(print_nvlist, nvlist_t *); + DEFINEARROP(print_boolean_array, boolean_t *); + DEFINEARROP(print_byte_array, uchar_t *); + DEFINEARROP(print_int8_array, int8_t *); + DEFINEARROP(print_uint8_array, uint8_t *); + DEFINEARROP(print_int16_array, int16_t *); + DEFINEARROP(print_uint16_array, uint16_t *); + DEFINEARROP(print_int32_array, int32_t *); + DEFINEARROP(print_uint32_array, uint32_t *); + DEFINEARROP(print_int64_array, int64_t *); + DEFINEARROP(print_uint64_array, uint64_t *); + DEFINEARROP(print_string_array, char **); + DEFINEARROP(print_nvlist_array, nvlist_t **); +}; + +struct nvlist_prtctl { + FILE *nvprt_fp; /* output destination */ + enum nvlist_indent_mode nvprt_indent_mode; /* see above */ + int nvprt_indent; /* absolute indent, or tab depth */ + int nvprt_indentinc; /* indent or tab increment */ + const char *nvprt_nmfmt; /* member name format, max one %s */ + const char *nvprt_eomfmt; /* after member format, e.g. "\n" */ + const char *nvprt_btwnarrfmt; /* between array members */ + int nvprt_btwnarrfmt_nl; /* nvprt_eoamfmt includes newline? */ + struct nvlist_printops *nvprt_dfltops; + struct nvlist_printops *nvprt_custops; +}; + +#define DFLTPRTOP(pctl, type) \ + ((pctl)->nvprt_dfltops->print_##type.op) + +#define DFLTPRTOPARG(pctl, type) \ + ((pctl)->nvprt_dfltops->print_##type.arg) + +#define CUSTPRTOP(pctl, type) \ + ((pctl)->nvprt_custops->print_##type.op) + +#define CUSTPRTOPARG(pctl, type) \ + ((pctl)->nvprt_custops->print_##type.arg) + +#define RENDER(pctl, type, nvl, name, val) \ + { \ + int done = 0; \ + if ((pctl)->nvprt_custops && CUSTPRTOP(pctl, type)) { \ + done = CUSTPRTOP(pctl, type)(pctl, \ + CUSTPRTOPARG(pctl, type), nvl, name, val); \ + } \ + if (!done) { \ + (void) DFLTPRTOP(pctl, type)(pctl, \ + DFLTPRTOPARG(pctl, type), nvl, name, val); \ + } \ + (void) fprintf(pctl->nvprt_fp, pctl->nvprt_eomfmt); \ + } + +#define ARENDER(pctl, type, nvl, name, arrp, count) \ + { \ + int done = 0; \ + if ((pctl)->nvprt_custops && CUSTPRTOP(pctl, type)) { \ + done = CUSTPRTOP(pctl, type)(pctl, \ + CUSTPRTOPARG(pctl, type), nvl, name, arrp, count); \ + } \ + if (!done) { \ + (void) DFLTPRTOP(pctl, type)(pctl, \ + DFLTPRTOPARG(pctl, type), nvl, name, arrp, count); \ + } \ + (void) fprintf(pctl->nvprt_fp, pctl->nvprt_eomfmt); \ + } + +static void nvlist_print_with_indent(nvlist_t *, nvlist_prtctl_t); + +/* + * ====================================================================== + * | | + * | Indentation | + * | | + * ====================================================================== + */ + +static void +indent(nvlist_prtctl_t pctl, int onemore) +{ + int depth; + + switch (pctl->nvprt_indent_mode) { + case NVLIST_INDENT_ABS: + (void) fprintf(pctl->nvprt_fp, "%*s", + pctl->nvprt_indent + onemore * pctl->nvprt_indentinc, ""); + break; + + case NVLIST_INDENT_TABBED: + depth = pctl->nvprt_indent + onemore; + while (depth-- > 0) + (void) fprintf(pctl->nvprt_fp, "\t"); + } +} + +/* + * ====================================================================== + * | | + * | Default nvlist member rendering functions. | + * | | + * ====================================================================== + */ + +/* + * Generate functions to print single-valued nvlist members. + * + * type_and_variant - suffix to form function name + * vtype - C type for the member value + * ptype - C type to cast value to for printing + * vfmt - format string for pair value, e.g "%d" or "0x%llx" + */ + +#define NVLIST_PRTFUNC(type_and_variant, vtype, ptype, vfmt) \ +static int \ +nvprint_##type_and_variant(nvlist_prtctl_t pctl, void *private, \ + nvlist_t *nvl, const char *name, vtype value) \ +{ \ + FILE *fp = pctl->nvprt_fp; \ + NOTE(ARGUNUSED(private)) \ + NOTE(ARGUNUSED(nvl)) \ + indent(pctl, 1); \ + (void) fprintf(fp, pctl->nvprt_nmfmt, name); \ + (void) fprintf(fp, vfmt, (ptype)value); \ + return (1); \ +} + +NVLIST_PRTFUNC(boolean, int, int, "%d") +NVLIST_PRTFUNC(boolean_value, boolean_t, int, "%d") +NVLIST_PRTFUNC(byte, uchar_t, uchar_t, "0x%2.2x") +NVLIST_PRTFUNC(int8, int8_t, int, "%d") +NVLIST_PRTFUNC(uint8, uint8_t, uint8_t, "0x%x") +NVLIST_PRTFUNC(int16, int16_t, int16_t, "%d") +NVLIST_PRTFUNC(uint16, uint16_t, uint16_t, "0x%x") +NVLIST_PRTFUNC(int32, int32_t, int32_t, "%d") +NVLIST_PRTFUNC(uint32, uint32_t, uint32_t, "0x%x") +NVLIST_PRTFUNC(int64, int64_t, longlong_t, "%lld") +NVLIST_PRTFUNC(uint64, uint64_t, u_longlong_t, "0x%llx") +NVLIST_PRTFUNC(double, double, double, "0x%f") +NVLIST_PRTFUNC(string, char *, char *, "%s") +NVLIST_PRTFUNC(hrtime, hrtime_t, hrtime_t, "0x%llx") + +/* + * Generate functions to print array-valued nvlist members. + */ + +#define NVLIST_ARRPRTFUNC(type_and_variant, vtype, ptype, vfmt) \ +static int \ +nvaprint_##type_and_variant(nvlist_prtctl_t pctl, void *private, \ + nvlist_t *nvl, const char *name, vtype *valuep, uint_t count) \ +{ \ + FILE *fp = pctl->nvprt_fp; \ + uint_t i; \ + NOTE(ARGUNUSED(private)) \ + NOTE(ARGUNUSED(nvl)) \ + for (i = 0; i < count; i++) { \ + if (i == 0 || pctl->nvprt_btwnarrfmt_nl) { \ + indent(pctl, 1); \ + (void) fprintf(fp, pctl->nvprt_nmfmt, name); \ + if (pctl->nvprt_btwnarrfmt_nl) \ + (void) fprintf(fp, "[%d]: ", i); \ + } \ + if (i != 0) \ + (void) fprintf(fp, pctl->nvprt_btwnarrfmt); \ + (void) fprintf(fp, vfmt, (ptype)valuep[i]); \ + } \ + return (1); \ +} + +NVLIST_ARRPRTFUNC(boolean_array, boolean_t, boolean_t, "%d") +NVLIST_ARRPRTFUNC(byte_array, uchar_t, uchar_t, "0x%2.2x") +NVLIST_ARRPRTFUNC(int8_array, int8_t, int8_t, "%d") +NVLIST_ARRPRTFUNC(uint8_array, uint8_t, uint8_t, "0x%x") +NVLIST_ARRPRTFUNC(int16_array, int16_t, int16_t, "%d") +NVLIST_ARRPRTFUNC(uint16_array, uint16_t, uint16_t, "0x%x") +NVLIST_ARRPRTFUNC(int32_array, int32_t, int32_t, "%d") +NVLIST_ARRPRTFUNC(uint32_array, uint32_t, uint32_t, "0x%x") +NVLIST_ARRPRTFUNC(int64_array, int64_t, longlong_t, "%lld") +NVLIST_ARRPRTFUNC(uint64_array, uint64_t, u_longlong_t, "0x%llx") +NVLIST_ARRPRTFUNC(string_array, char *, char *, "%s") + +/*ARGSUSED*/ +static int +nvprint_nvlist(nvlist_prtctl_t pctl, void *private, + nvlist_t *nvl, const char *name, nvlist_t *value) +{ + FILE *fp = pctl->nvprt_fp; + + indent(pctl, 1); + (void) fprintf(fp, "%s = (embedded nvlist)\n", name); + + pctl->nvprt_indent += pctl->nvprt_indentinc; + nvlist_print_with_indent(value, pctl); + pctl->nvprt_indent -= pctl->nvprt_indentinc; + + indent(pctl, 1); + (void) fprintf(fp, "(end %s)\n", name); + + return (1); +} + +/*ARGSUSED*/ +static int +nvaprint_nvlist_array(nvlist_prtctl_t pctl, void *private, + nvlist_t *nvl, const char *name, nvlist_t **valuep, uint_t count) +{ + FILE *fp = pctl->nvprt_fp; + uint_t i; + + indent(pctl, 1); + (void) fprintf(fp, "%s = (array of embedded nvlists)\n", name); + + for (i = 0; i < count; i++) { + indent(pctl, 1); + (void) fprintf(fp, "(start %s[%d])\n", name, i); + + pctl->nvprt_indent += pctl->nvprt_indentinc; + nvlist_print_with_indent(valuep[i], pctl); + pctl->nvprt_indent -= pctl->nvprt_indentinc; + + indent(pctl, 1); + (void) fprintf(fp, "(end %s[%d])\n", name, i); + } + + return (1); +} + +/* + * ====================================================================== + * | | + * | Interfaces that allow control over formatting. | + * | | + * ====================================================================== + */ + +void +nvlist_prtctl_setdest(nvlist_prtctl_t pctl, FILE *fp) +{ + pctl->nvprt_fp = fp; +} + +FILE * +nvlist_prtctl_getdest(nvlist_prtctl_t pctl) +{ + return (pctl->nvprt_fp); +} + + +void +nvlist_prtctl_setindent(nvlist_prtctl_t pctl, enum nvlist_indent_mode mode, + int start, int inc) +{ + if (mode < NVLIST_INDENT_ABS || mode > NVLIST_INDENT_TABBED) + mode = NVLIST_INDENT_TABBED; + + if (start < 0) + start = 0; + + if (inc < 0) + inc = 1; + + pctl->nvprt_indent_mode = mode; + pctl->nvprt_indent = start; + pctl->nvprt_indentinc = inc; +} + +void +nvlist_prtctl_doindent(nvlist_prtctl_t pctl, int onemore) +{ + indent(pctl, onemore); +} + + +void +nvlist_prtctl_setfmt(nvlist_prtctl_t pctl, enum nvlist_prtctl_fmt which, + const char *fmt) +{ + switch (which) { + case NVLIST_FMT_MEMBER_NAME: + if (fmt == NULL) + fmt = "%s = "; + pctl->nvprt_nmfmt = fmt; + break; + + case NVLIST_FMT_MEMBER_POSTAMBLE: + if (fmt == NULL) + fmt = "\n"; + pctl->nvprt_eomfmt = fmt; + break; + + case NVLIST_FMT_BTWN_ARRAY: + if (fmt == NULL) { + pctl->nvprt_btwnarrfmt = " "; + pctl->nvprt_btwnarrfmt_nl = 0; + } else { + pctl->nvprt_btwnarrfmt = fmt; + pctl->nvprt_btwnarrfmt_nl = (strstr(fmt, "\n") != NULL); + } + break; + + default: + break; + } +} + + +void +nvlist_prtctl_dofmt(nvlist_prtctl_t pctl, enum nvlist_prtctl_fmt which, ...) +{ + FILE *fp = pctl->nvprt_fp; + va_list ap; + char *name; + + va_start(ap, which); + + switch (which) { + case NVLIST_FMT_MEMBER_NAME: + name = va_arg(ap, char *); + (void) fprintf(fp, pctl->nvprt_nmfmt, name); + break; + + case NVLIST_FMT_MEMBER_POSTAMBLE: + (void) fprintf(fp, pctl->nvprt_eomfmt); + break; + + case NVLIST_FMT_BTWN_ARRAY: + (void) fprintf(fp, pctl->nvprt_btwnarrfmt); \ + break; + + default: + break; + } + + va_end(ap); +} + +/* + * ====================================================================== + * | | + * | Interfaces to allow appointment of replacement rendering functions.| + * | | + * ====================================================================== + */ + +#define NVLIST_PRINTCTL_REPLACE(type, vtype) \ +void \ +nvlist_prtctlop_##type(nvlist_prtctl_t pctl, \ + int (*func)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype), \ + void *private) \ +{ \ + CUSTPRTOP(pctl, type) = func; \ + CUSTPRTOPARG(pctl, type) = private; \ +} + +NVLIST_PRINTCTL_REPLACE(boolean, int) +NVLIST_PRINTCTL_REPLACE(boolean_value, boolean_t) +NVLIST_PRINTCTL_REPLACE(byte, uchar_t) +NVLIST_PRINTCTL_REPLACE(int8, int8_t) +NVLIST_PRINTCTL_REPLACE(uint8, uint8_t) +NVLIST_PRINTCTL_REPLACE(int16, int16_t) +NVLIST_PRINTCTL_REPLACE(uint16, uint16_t) +NVLIST_PRINTCTL_REPLACE(int32, int32_t) +NVLIST_PRINTCTL_REPLACE(uint32, uint32_t) +NVLIST_PRINTCTL_REPLACE(int64, int64_t) +NVLIST_PRINTCTL_REPLACE(uint64, uint64_t) +NVLIST_PRINTCTL_REPLACE(double, double) +NVLIST_PRINTCTL_REPLACE(string, char *) +NVLIST_PRINTCTL_REPLACE(hrtime, hrtime_t) +NVLIST_PRINTCTL_REPLACE(nvlist, nvlist_t *) + +#define NVLIST_PRINTCTL_AREPLACE(type, vtype) \ +void \ +nvlist_prtctlop_##type(nvlist_prtctl_t pctl, \ + int (*func)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype, \ + uint_t), void *private) \ +{ \ + CUSTPRTOP(pctl, type) = func; \ + CUSTPRTOPARG(pctl, type) = private; \ +} + +NVLIST_PRINTCTL_AREPLACE(boolean_array, boolean_t *) +NVLIST_PRINTCTL_AREPLACE(byte_array, uchar_t *) +NVLIST_PRINTCTL_AREPLACE(int8_array, int8_t *) +NVLIST_PRINTCTL_AREPLACE(uint8_array, uint8_t *) +NVLIST_PRINTCTL_AREPLACE(int16_array, int16_t *) +NVLIST_PRINTCTL_AREPLACE(uint16_array, uint16_t *) +NVLIST_PRINTCTL_AREPLACE(int32_array, int32_t *) +NVLIST_PRINTCTL_AREPLACE(uint32_array, uint32_t *) +NVLIST_PRINTCTL_AREPLACE(int64_array, int64_t *) +NVLIST_PRINTCTL_AREPLACE(uint64_array, uint64_t *) +NVLIST_PRINTCTL_AREPLACE(string_array, char **) +NVLIST_PRINTCTL_AREPLACE(nvlist_array, nvlist_t **) + +/* + * ====================================================================== + * | | + * | Interfaces to manage nvlist_prtctl_t cookies. | + * | | + * ====================================================================== + */ + + +static const struct nvlist_printops defprtops = { + { nvprint_boolean, NULL }, + { nvprint_boolean_value, NULL }, + { nvprint_byte, NULL }, + { nvprint_int8, NULL }, + { nvprint_uint8, NULL }, + { nvprint_int16, NULL }, + { nvprint_uint16, NULL }, + { nvprint_int32, NULL }, + { nvprint_uint32, NULL }, + { nvprint_int64, NULL }, + { nvprint_uint64, NULL }, + { nvprint_double, NULL }, + { nvprint_string, NULL }, + { nvprint_hrtime, NULL }, + { nvprint_nvlist, NULL }, + { nvaprint_boolean_array, NULL }, + { nvaprint_byte_array, NULL }, + { nvaprint_int8_array, NULL }, + { nvaprint_uint8_array, NULL }, + { nvaprint_int16_array, NULL }, + { nvaprint_uint16_array, NULL }, + { nvaprint_int32_array, NULL }, + { nvaprint_uint32_array, NULL }, + { nvaprint_int64_array, NULL }, + { nvaprint_uint64_array, NULL }, + { nvaprint_string_array, NULL }, + { nvaprint_nvlist_array, NULL }, +}; + +static void +prtctl_defaults(FILE *fp, struct nvlist_prtctl *pctl, + struct nvlist_printops *ops) +{ + pctl->nvprt_fp = fp; + pctl->nvprt_indent_mode = NVLIST_INDENT_TABBED; + pctl->nvprt_indent = 0; + pctl->nvprt_indentinc = 1; + pctl->nvprt_nmfmt = "%s = "; + pctl->nvprt_eomfmt = "\n"; + pctl->nvprt_btwnarrfmt = " "; + pctl->nvprt_btwnarrfmt_nl = 0; + + pctl->nvprt_dfltops = (struct nvlist_printops *)&defprtops; + pctl->nvprt_custops = ops; +} + +nvlist_prtctl_t +nvlist_prtctl_alloc(void) +{ + struct nvlist_prtctl *pctl; + struct nvlist_printops *ops; + + if ((pctl = malloc(sizeof (*pctl))) == NULL) + return (NULL); + + if ((ops = calloc(1, sizeof (*ops))) == NULL) { + free(pctl); + return (NULL); + } + + prtctl_defaults(stdout, pctl, ops); + + return (pctl); +} + +void +nvlist_prtctl_free(nvlist_prtctl_t pctl) +{ + if (pctl != NULL) { + free(pctl->nvprt_custops); + free(pctl); + } +} + +/* + * ====================================================================== + * | | + * | Top-level print request interfaces. | + * | | + * ====================================================================== + */ + +/* + * nvlist_print - Prints elements in an event buffer + */ +static void +nvlist_print_with_indent(nvlist_t *nvl, nvlist_prtctl_t pctl) +{ + FILE *fp = pctl->nvprt_fp; + char *name; + uint_t nelem; + nvpair_t *nvp; + + if (nvl == NULL) + return; + + indent(pctl, 0); + (void) fprintf(fp, "nvlist version: %d\n", NVL_VERSION(nvl)); + + nvp = nvlist_next_nvpair(nvl, NULL); + + while (nvp) { + data_type_t type = nvpair_type(nvp); + + name = nvpair_name(nvp); + nelem = 0; + + switch (type) { + case DATA_TYPE_BOOLEAN: { + RENDER(pctl, boolean, nvl, name, 1); + break; + } + case DATA_TYPE_BOOLEAN_VALUE: { + boolean_t val; + (void) nvpair_value_boolean_value(nvp, &val); + RENDER(pctl, boolean_value, nvl, name, val); + break; + } + case DATA_TYPE_BYTE: { + uchar_t val; + (void) nvpair_value_byte(nvp, &val); + RENDER(pctl, byte, nvl, name, val); + break; + } + case DATA_TYPE_INT8: { + int8_t val; + (void) nvpair_value_int8(nvp, &val); + RENDER(pctl, int8, nvl, name, val); + break; + } + case DATA_TYPE_UINT8: { + uint8_t val; + (void) nvpair_value_uint8(nvp, &val); + RENDER(pctl, uint8, nvl, name, val); + break; + } + case DATA_TYPE_INT16: { + int16_t val; + (void) nvpair_value_int16(nvp, &val); + RENDER(pctl, int16, nvl, name, val); + break; + } + case DATA_TYPE_UINT16: { + uint16_t val; + (void) nvpair_value_uint16(nvp, &val); + RENDER(pctl, uint16, nvl, name, val); + break; + } + case DATA_TYPE_INT32: { + int32_t val; + (void) nvpair_value_int32(nvp, &val); + RENDER(pctl, int32, nvl, name, val); + break; + } + case DATA_TYPE_UINT32: { + uint32_t val; + (void) nvpair_value_uint32(nvp, &val); + RENDER(pctl, uint32, nvl, name, val); + break; + } + case DATA_TYPE_INT64: { + int64_t val; + (void) nvpair_value_int64(nvp, &val); + RENDER(pctl, int64, nvl, name, val); + break; + } + case DATA_TYPE_UINT64: { + uint64_t val; + (void) nvpair_value_uint64(nvp, &val); + RENDER(pctl, uint64, nvl, name, val); + break; + } + case DATA_TYPE_DOUBLE: { + double val; + (void) nvpair_value_double(nvp, &val); + RENDER(pctl, double, nvl, name, val); + break; + } + case DATA_TYPE_STRING: { + char *val; + (void) nvpair_value_string(nvp, &val); + RENDER(pctl, string, nvl, name, val); + break; + } + case DATA_TYPE_BOOLEAN_ARRAY: { + boolean_t *val; + (void) nvpair_value_boolean_array(nvp, &val, &nelem); + ARENDER(pctl, boolean_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_BYTE_ARRAY: { + uchar_t *val; + (void) nvpair_value_byte_array(nvp, &val, &nelem); + ARENDER(pctl, byte_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_INT8_ARRAY: { + int8_t *val; + (void) nvpair_value_int8_array(nvp, &val, &nelem); + ARENDER(pctl, int8_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_UINT8_ARRAY: { + uint8_t *val; + (void) nvpair_value_uint8_array(nvp, &val, &nelem); + ARENDER(pctl, uint8_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_INT16_ARRAY: { + int16_t *val; + (void) nvpair_value_int16_array(nvp, &val, &nelem); + ARENDER(pctl, int16_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_UINT16_ARRAY: { + uint16_t *val; + (void) nvpair_value_uint16_array(nvp, &val, &nelem); + ARENDER(pctl, uint16_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_INT32_ARRAY: { + int32_t *val; + (void) nvpair_value_int32_array(nvp, &val, &nelem); + ARENDER(pctl, int32_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_UINT32_ARRAY: { + uint32_t *val; + (void) nvpair_value_uint32_array(nvp, &val, &nelem); + ARENDER(pctl, uint32_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_INT64_ARRAY: { + int64_t *val; + (void) nvpair_value_int64_array(nvp, &val, &nelem); + ARENDER(pctl, int64_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_UINT64_ARRAY: { + uint64_t *val; + (void) nvpair_value_uint64_array(nvp, &val, &nelem); + ARENDER(pctl, uint64_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_STRING_ARRAY: { + char **val; + (void) nvpair_value_string_array(nvp, &val, &nelem); + ARENDER(pctl, string_array, nvl, name, val, nelem); + break; + } + case DATA_TYPE_HRTIME: { + hrtime_t val; + (void) nvpair_value_hrtime(nvp, &val); + RENDER(pctl, hrtime, nvl, name, val); + break; + } + case DATA_TYPE_NVLIST: { + nvlist_t *val; + (void) nvpair_value_nvlist(nvp, &val); + RENDER(pctl, nvlist, nvl, name, val); + break; + } + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **val; + (void) nvpair_value_nvlist_array(nvp, &val, &nelem); + ARENDER(pctl, nvlist_array, nvl, name, val, nelem); + break; + } + default: + (void) fprintf(fp, " unknown data type (%d)", type); + break; + } + nvp = nvlist_next_nvpair(nvl, nvp); + } +} + +void +nvlist_print(FILE *fp, nvlist_t *nvl) +{ + struct nvlist_prtctl pc; + + prtctl_defaults(fp, &pc, NULL); + nvlist_print_with_indent(nvl, &pc); +} + +void +nvlist_prt(nvlist_t *nvl, nvlist_prtctl_t pctl) +{ + nvlist_print_with_indent(nvl, pctl); +} + +#define NVP(elem, type, vtype, ptype, format) { \ + vtype value; \ +\ + (void) nvpair_value_##type(elem, &value); \ + (void) printf("%*s%s: " format "\n", indent, "", \ + nvpair_name(elem), (ptype)value); \ +} + +#define NVPA(elem, type, vtype, ptype, format) { \ + uint_t i, count; \ + vtype *value; \ +\ + (void) nvpair_value_##type(elem, &value, &count); \ + for (i = 0; i < count; i++) { \ + (void) printf("%*s%s[%d]: " format "\n", indent, "", \ + nvpair_name(elem), i, (ptype)value[i]); \ + } \ +} + +/* + * Similar to nvlist_print() but handles arrays slightly differently. + */ +void +dump_nvlist(nvlist_t *list, int indent) +{ + nvpair_t *elem = NULL; + boolean_t bool_value; + nvlist_t *nvlist_value; + nvlist_t **nvlist_array_value; + uint_t i, count; + + if (list == NULL) { + return; + } + + while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { + switch (nvpair_type(elem)) { + case DATA_TYPE_BOOLEAN: + (void) printf("%*s%s\n", indent, "", nvpair_name(elem)); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + (void) nvpair_value_boolean_value(elem, &bool_value); + (void) printf("%*s%s: %s\n", indent, "", + nvpair_name(elem), bool_value ? "true" : "false"); + break; + + case DATA_TYPE_BYTE: + NVP(elem, byte, uchar_t, int, "%u"); + break; + + case DATA_TYPE_INT8: + NVP(elem, int8, int8_t, int, "%d"); + break; + + case DATA_TYPE_UINT8: + NVP(elem, uint8, uint8_t, int, "%u"); + break; + + case DATA_TYPE_INT16: + NVP(elem, int16, int16_t, int, "%d"); + break; + + case DATA_TYPE_UINT16: + NVP(elem, uint16, uint16_t, int, "%u"); + break; + + case DATA_TYPE_INT32: + NVP(elem, int32, int32_t, long, "%ld"); + break; + + case DATA_TYPE_UINT32: + NVP(elem, uint32, uint32_t, ulong_t, "%lu"); + break; + + case DATA_TYPE_INT64: + NVP(elem, int64, int64_t, longlong_t, "%lld"); + break; + + case DATA_TYPE_UINT64: + NVP(elem, uint64, uint64_t, u_longlong_t, "%llu"); + break; + + case DATA_TYPE_STRING: + NVP(elem, string, char *, char *, "'%s'"); + break; + + case DATA_TYPE_BYTE_ARRAY: + NVPA(elem, byte_array, uchar_t, int, "%u"); + break; + + case DATA_TYPE_INT8_ARRAY: + NVPA(elem, int8_array, int8_t, int, "%d"); + break; + + case DATA_TYPE_UINT8_ARRAY: + NVPA(elem, uint8_array, uint8_t, int, "%u"); + break; + + case DATA_TYPE_INT16_ARRAY: + NVPA(elem, int16_array, int16_t, int, "%d"); + break; + + case DATA_TYPE_UINT16_ARRAY: + NVPA(elem, uint16_array, uint16_t, int, "%u"); + break; + + case DATA_TYPE_INT32_ARRAY: + NVPA(elem, int32_array, int32_t, long, "%ld"); + break; + + case DATA_TYPE_UINT32_ARRAY: + NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu"); + break; + + case DATA_TYPE_INT64_ARRAY: + NVPA(elem, int64_array, int64_t, longlong_t, "%lld"); + break; + + case DATA_TYPE_UINT64_ARRAY: + NVPA(elem, uint64_array, uint64_t, u_longlong_t, + "%llu"); + break; + + case DATA_TYPE_STRING_ARRAY: + NVPA(elem, string_array, char *, char *, "'%s'"); + break; + + case DATA_TYPE_NVLIST: + (void) nvpair_value_nvlist(elem, &nvlist_value); + (void) printf("%*s%s:\n", indent, "", + nvpair_name(elem)); + dump_nvlist(nvlist_value, indent + 4); + break; + + case DATA_TYPE_NVLIST_ARRAY: + (void) nvpair_value_nvlist_array(elem, + &nvlist_array_value, &count); + for (i = 0; i < count; i++) { + (void) printf("%*s%s[%u]:\n", indent, "", + nvpair_name(elem), i); + dump_nvlist(nvlist_array_value[i], indent + 4); + } + break; + + default: + (void) printf(dgettext(TEXT_DOMAIN, "bad config type " + "%d for %s\n"), nvpair_type(elem), + nvpair_name(elem)); + } + } +} + +/* + * ====================================================================== + * | | + * | Misc private interface. | + * | | + * ====================================================================== + */ + +/* + * Determine if string 'value' matches 'nvp' value. The 'value' string is + * converted, depending on the type of 'nvp', prior to match. For numeric + * types, a radix independent sscanf conversion of 'value' is used. If 'nvp' + * is an array type, 'ai' is the index into the array against which we are + * checking for match. If nvp is of DATA_TYPE_STRING*, the caller can pass + * in a regex_t compilation of value in 'value_regex' to trigger regular + * expression string match instead of simple strcmp(). + * + * Return 1 on match, 0 on no-match, and -1 on error. If the error is + * related to value syntax error and 'ep' is non-NULL, *ep will point into + * the 'value' string at the location where the error exists. + * + * NOTE: It may be possible to move the non-regex_t version of this into + * common code used by library/kernel/boot. + */ +int +nvpair_value_match_regex(nvpair_t *nvp, int ai, + char *value, regex_t *value_regex, char **ep) +{ + char *evalue; + uint_t a_len; + int sr; + + if (ep) + *ep = NULL; + + if ((nvp == NULL) || (value == NULL)) + return (-1); /* error fail match - invalid args */ + + /* make sure array and index combination make sense */ + if ((nvpair_type_is_array(nvp) && (ai < 0)) || + (!nvpair_type_is_array(nvp) && (ai >= 0))) + return (-1); /* error fail match - bad index */ + + /* non-string values should be single 'chunk' */ + if ((nvpair_type(nvp) != DATA_TYPE_STRING) && + (nvpair_type(nvp) != DATA_TYPE_STRING_ARRAY)) { + value += strspn(value, " \t"); + evalue = value + strcspn(value, " \t"); + if (*evalue) { + if (ep) + *ep = evalue; + return (-1); /* error fail match - syntax */ + } + } + + sr = EOF; + switch (nvpair_type(nvp)) { + case DATA_TYPE_STRING: { + char *val; + + /* check string value for match */ + if (nvpair_value_string(nvp, &val) == 0) { + if (value_regex) { + if (regexec(value_regex, val, + (size_t)0, NULL, 0) == 0) + return (1); /* match */ + } else { + if (strcmp(value, val) == 0) + return (1); /* match */ + } + } + break; + } + case DATA_TYPE_STRING_ARRAY: { + char **val_array; + + /* check indexed string value of array for match */ + if ((nvpair_value_string_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len)) { + if (value_regex) { + if (regexec(value_regex, val_array[ai], + (size_t)0, NULL, 0) == 0) + return (1); + } else { + if (strcmp(value, val_array[ai]) == 0) + return (1); + } + } + break; + } + case DATA_TYPE_BYTE: { + uchar_t val, val_arg; + + /* scanf uchar_t from value and check for match */ + sr = sscanf(value, "%c", &val_arg); + if ((sr == 1) && (nvpair_value_byte(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_BYTE_ARRAY: { + uchar_t *val_array, val_arg; + + + /* check indexed value of array for match */ + sr = sscanf(value, "%c", &val_arg); + if ((sr == 1) && + (nvpair_value_byte_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_INT8: { + int8_t val, val_arg; + + /* scanf int8_t from value and check for match */ + sr = sscanf(value, "%"SCNi8, &val_arg); + if ((sr == 1) && + (nvpair_value_int8(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_INT8_ARRAY: { + int8_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi8, &val_arg); + if ((sr == 1) && + (nvpair_value_int8_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_UINT8: { + uint8_t val, val_arg; + + /* scanf uint8_t from value and check for match */ + sr = sscanf(value, "%"SCNi8, (int8_t *)&val_arg); + if ((sr == 1) && + (nvpair_value_uint8(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_UINT8_ARRAY: { + uint8_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi8, (int8_t *)&val_arg); + if ((sr == 1) && + (nvpair_value_uint8_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_INT16: { + int16_t val, val_arg; + + /* scanf int16_t from value and check for match */ + sr = sscanf(value, "%"SCNi16, &val_arg); + if ((sr == 1) && + (nvpair_value_int16(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_INT16_ARRAY: { + int16_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi16, &val_arg); + if ((sr == 1) && + (nvpair_value_int16_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_UINT16: { + uint16_t val, val_arg; + + /* scanf uint16_t from value and check for match */ + sr = sscanf(value, "%"SCNi16, (int16_t *)&val_arg); + if ((sr == 1) && + (nvpair_value_uint16(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_UINT16_ARRAY: { + uint16_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi16, (int16_t *)&val_arg); + if ((sr == 1) && + (nvpair_value_uint16_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_INT32: { + int32_t val, val_arg; + + /* scanf int32_t from value and check for match */ + sr = sscanf(value, "%"SCNi32, &val_arg); + if ((sr == 1) && + (nvpair_value_int32(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_INT32_ARRAY: { + int32_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi32, &val_arg); + if ((sr == 1) && + (nvpair_value_int32_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_UINT32: { + uint32_t val, val_arg; + + /* scanf uint32_t from value and check for match */ + sr = sscanf(value, "%"SCNi32, (int32_t *)&val_arg); + if ((sr == 1) && + (nvpair_value_uint32(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_UINT32_ARRAY: { + uint32_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi32, (int32_t *)&val_arg); + if ((sr == 1) && + (nvpair_value_uint32_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_INT64: { + int64_t val, val_arg; + + /* scanf int64_t from value and check for match */ + sr = sscanf(value, "%"SCNi64, &val_arg); + if ((sr == 1) && + (nvpair_value_int64(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_INT64_ARRAY: { + int64_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi64, &val_arg); + if ((sr == 1) && + (nvpair_value_int64_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_UINT64: { + uint64_t val_arg, val; + + /* scanf uint64_t from value and check for match */ + sr = sscanf(value, "%"SCNi64, (int64_t *)&val_arg); + if ((sr == 1) && + (nvpair_value_uint64(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_UINT64_ARRAY: { + uint64_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi64, (int64_t *)&val_arg); + if ((sr == 1) && + (nvpair_value_uint64_array(nvp, &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_BOOLEAN_VALUE: { + boolean_t val, val_arg; + + /* scanf boolean_t from value and check for match */ + sr = sscanf(value, "%"SCNi32, &val_arg); + if ((sr == 1) && + (nvpair_value_boolean_value(nvp, &val) == 0) && + (val == val_arg)) + return (1); + break; + } + case DATA_TYPE_BOOLEAN_ARRAY: { + boolean_t *val_array, val_arg; + + /* check indexed value of array for match */ + sr = sscanf(value, "%"SCNi32, &val_arg); + if ((sr == 1) && + (nvpair_value_boolean_array(nvp, + &val_array, &a_len) == 0) && + (ai < a_len) && + (val_array[ai] == val_arg)) + return (1); + break; + } + case DATA_TYPE_HRTIME: + case DATA_TYPE_NVLIST: + case DATA_TYPE_NVLIST_ARRAY: + case DATA_TYPE_BOOLEAN: + case DATA_TYPE_DOUBLE: + case DATA_TYPE_UNKNOWN: + default: + /* + * unknown/unsupported data type + */ + return (-1); /* error fail match */ + } + + /* + * check to see if sscanf failed conversion, return approximate + * pointer to problem + */ + if (sr != 1) { + if (ep) + *ep = value; + return (-1); /* error fail match - syntax */ + } + + return (0); /* fail match */ +} + +int +nvpair_value_match(nvpair_t *nvp, int ai, char *value, char **ep) +{ + return (nvpair_value_match_regex(nvp, ai, value, NULL, ep)); +} diff --git a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h new file mode 100644 index 0000000..b05669e --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h @@ -0,0 +1,196 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#ifndef _LIBNVPAIR_H +#define _LIBNVPAIR_H + +#include <sys/nvpair.h> +#include <stdlib.h> +#include <stdio.h> +#include <regex.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * All interfaces described in this file are private to Solaris, and + * are subject to change at any time and without notice. The public + * nvlist/nvpair interfaces, as documented in manpage sections 3NVPAIR, + * are all imported from <sys/nvpair.h> included above. + */ + +extern int nvpair_value_match(nvpair_t *, int, char *, char **); +extern int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, + char **); + +extern void nvlist_print(FILE *, nvlist_t *); +extern int nvlist_print_json(FILE *, nvlist_t *); +extern void dump_nvlist(nvlist_t *, int); + +/* + * Private nvlist printing interface that allows the caller some control + * over output rendering (as opposed to nvlist_print and dump_nvlist). + * + * Obtain an opaque nvlist_prtctl_t cookie using nvlist_prtctl_alloc + * (NULL on failure); on return the cookie is set up for default formatting + * and rendering. Quote the cookie in subsequent customisation functions and + * then pass the cookie to nvlist_prt to render the nvlist. Finally, + * use nvlist_prtctl_free to release the cookie. + * + * For all nvlist_lookup_xxx and nvlist_lookup_xxx_array functions + * we have a corresponding brace of functions that appoint replacement + * rendering functions: + * + * extern void nvlist_prtctl_xxx(nvlist_prtctl_t, + * void (*)(nvlist_prtctl_t ctl, void *private, const char *name, + * xxxtype value)) + * + * and + * + * extern void nvlist_prtctl_xxx_array(nvlist_prtctl_t, + * void (*)(nvlist_prtctl_t ctl, void *private, const char *name, + * xxxtype value, uint_t count)) + * + * where xxxtype is the C datatype corresponding to xxx, eg int8_t for "int8" + * and char * for "string". The function that is appointed to render the + * specified datatype receives as arguments the cookie, the nvlist + * member name, the value of that member (or a pointer for array function), + * and (for array rendering functions) a count of the number of elements. + */ + +typedef struct nvlist_prtctl *nvlist_prtctl_t; /* opaque */ + +enum nvlist_indent_mode { + NVLIST_INDENT_ABS, /* Absolute indentation */ + NVLIST_INDENT_TABBED /* Indent with tabstops */ +}; + +extern nvlist_prtctl_t nvlist_prtctl_alloc(void); +extern void nvlist_prtctl_free(nvlist_prtctl_t); +extern void nvlist_prt(nvlist_t *, nvlist_prtctl_t); + +/* Output stream */ +extern void nvlist_prtctl_setdest(nvlist_prtctl_t, FILE *); +extern FILE *nvlist_prtctl_getdest(nvlist_prtctl_t); + +/* Indentation mode, start indent, indent increment; default tabbed/0/1 */ +extern void nvlist_prtctl_setindent(nvlist_prtctl_t, enum nvlist_indent_mode, + int, int); +extern void nvlist_prtctl_doindent(nvlist_prtctl_t, int); + +enum nvlist_prtctl_fmt { + NVLIST_FMT_MEMBER_NAME, /* name fmt; default "%s = " */ + NVLIST_FMT_MEMBER_POSTAMBLE, /* after nvlist member; default "\n" */ + NVLIST_FMT_BTWN_ARRAY /* between array members; default " " */ +}; + +extern void nvlist_prtctl_setfmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, + const char *); +extern void nvlist_prtctl_dofmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, ...); + +/* + * Function prototypes for interfaces that appoint a new rendering function + * for single-valued nvlist members. + * + * A replacement function receives arguments as follows: + * + * nvlist_prtctl_t Print control structure; do not change preferences + * for this object from a print callback function. + * + * void * The function-private cookie argument registered + * when the replacement function was appointed. + * + * nvlist_t * The full nvlist that is being processed. The + * rendering function is called to render a single + * member (name and value passed as below) but it may + * want to reference or incorporate other aspects of + * the full nvlist. + * + * const char * Member name to render + * + * valtype Value of the member to render + * + * The function must return non-zero if it has rendered output for this + * member, or 0 if it wants to default to standard rendering for this + * one member. + */ + +#define NVLIST_PRINTCTL_SVDECL(funcname, valtype) \ + extern void funcname(nvlist_prtctl_t, \ + int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, valtype), \ + void *) + +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_boolean, int); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_boolean_value, boolean_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_byte, uchar_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int8, int8_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint8, uint8_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int16, int16_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint16, uint16_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int32, int32_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint32, uint32_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int64, int64_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint64, uint64_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_double, double); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_string, char *); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_hrtime, hrtime_t); +NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_nvlist, nvlist_t *); + +#undef NVLIST_PRINTCTL_SVDECL /* was just for "clarity" above */ + +/* + * Function prototypes for interfaces that appoint a new rendering function + * for array-valued nvlist members. + * + * One additional argument is taken: uint_t for the number of array elements + * + * Return values as above. + */ +#define NVLIST_PRINTCTL_AVDECL(funcname, vtype) \ + extern void funcname(nvlist_prtctl_t, \ + int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype, uint_t), \ + void *) + +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_boolean_array, boolean_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_byte_array, uchar_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int8_array, int8_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint8_array, uint8_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int16_array, int16_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint16_array, uint16_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int32_array, int32_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint32_array, uint32_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int64_array, int64_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint64_array, uint64_t *); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_string_array, char **); +NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_nvlist_array, nvlist_t **); + +#undef NVLIST_PRINTCTL_AVDECL /* was just for "clarity" above */ + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBNVPAIR_H */ diff --git a/cddl/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c b/cddl/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c new file mode 100644 index 0000000..1aefc10 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libnvpair/nvpair_alloc_system.c @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/nvpair.h> +#include <stdlib.h> + +/*ARGSUSED*/ +static void * +nv_alloc_sys(nv_alloc_t *nva, size_t size) +{ + return (malloc(size)); +} + +/*ARGSUSED*/ +static void +nv_free_sys(nv_alloc_t *nva, void *buf, size_t size) +{ + free(buf); +} + +const nv_alloc_ops_t system_ops_def = { + NULL, /* nv_ao_init() */ + NULL, /* nv_ao_fini() */ + nv_alloc_sys, /* nv_ao_alloc() */ + nv_free_sys, /* nv_ao_free() */ + NULL /* nv_ao_reset() */ +}; + +nv_alloc_t nv_alloc_nosleep_def = { + &system_ops_def, + NULL +}; + +nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def; diff --git a/cddl/contrib/opensolaris/lib/libnvpair/nvpair_json.c b/cddl/contrib/opensolaris/lib/libnvpair/nvpair_json.c new file mode 100644 index 0000000..7cece36 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libnvpair/nvpair_json.c @@ -0,0 +1,403 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ +/* + * Copyright (c) 2014, Joyent, Inc. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <wchar.h> +#include <sys/debug.h> + +#include "libnvpair.h" + +#define FPRINTF(fp, ...) \ + do { \ + if (fprintf(fp, __VA_ARGS__) < 0) \ + return (-1); \ + } while (0) + +/* + * When formatting a string for JSON output we must escape certain characters, + * as described in RFC4627. This applies to both member names and + * DATA_TYPE_STRING values. + * + * This function will only operate correctly if the following conditions are + * met: + * + * 1. The input String is encoded in the current locale. + * + * 2. The current locale includes the Basic Multilingual Plane (plane 0) + * as defined in the Unicode standard. + * + * The output will be entirely 7-bit ASCII (as a subset of UTF-8) with all + * representable Unicode characters included in their escaped numeric form. + */ +static int +nvlist_print_json_string(FILE *fp, const char *input) +{ + mbstate_t mbr; + wchar_t c; + size_t sz; + + bzero(&mbr, sizeof (mbr)); + + FPRINTF(fp, "\""); + while ((sz = mbrtowc(&c, input, MB_CUR_MAX, &mbr)) > 0) { + switch (c) { + case '"': + FPRINTF(fp, "\\\""); + break; + case '\n': + FPRINTF(fp, "\\n"); + break; + case '\r': + FPRINTF(fp, "\\r"); + break; + case '\\': + FPRINTF(fp, "\\\\"); + break; + case '\f': + FPRINTF(fp, "\\f"); + break; + case '\t': + FPRINTF(fp, "\\t"); + break; + case '\b': + FPRINTF(fp, "\\b"); + break; + default: + if ((c >= 0x00 && c <= 0x1f) || + (c > 0x7f && c <= 0xffff)) { + /* + * Render both Control Characters and Unicode + * characters in the Basic Multilingual Plane + * as JSON-escaped multibyte characters. + */ + FPRINTF(fp, "\\u%04x", (int)(0xffff & c)); + } else if (c >= 0x20 && c <= 0x7f) { + /* + * Render other 7-bit ASCII characters directly + * and drop other, unrepresentable characters. + */ + FPRINTF(fp, "%c", (int)(0xff & c)); + } + break; + } + input += sz; + } + + if (sz == (size_t)-1 || sz == (size_t)-2) { + /* + * We last read an invalid multibyte character sequence, + * so return an error. + */ + return (-1); + } + + FPRINTF(fp, "\""); + return (0); +} + +/* + * Dump a JSON-formatted representation of an nvlist to the provided FILE *. + * This routine does not output any new-lines or additional whitespace other + * than that contained in strings, nor does it call fflush(3C). + */ +int +nvlist_print_json(FILE *fp, nvlist_t *nvl) +{ + nvpair_t *curr; + boolean_t first = B_TRUE; + + FPRINTF(fp, "{"); + + for (curr = nvlist_next_nvpair(nvl, NULL); curr; + curr = nvlist_next_nvpair(nvl, curr)) { + data_type_t type = nvpair_type(curr); + + if (!first) + FPRINTF(fp, ","); + else + first = B_FALSE; + + if (nvlist_print_json_string(fp, nvpair_name(curr)) == -1) + return (-1); + FPRINTF(fp, ":"); + + switch (type) { + case DATA_TYPE_STRING: { + char *string = fnvpair_value_string(curr); + if (nvlist_print_json_string(fp, string) == -1) + return (-1); + break; + } + + case DATA_TYPE_BOOLEAN: { + FPRINTF(fp, "true"); + break; + } + + case DATA_TYPE_BOOLEAN_VALUE: { + FPRINTF(fp, "%s", fnvpair_value_boolean_value(curr) == + B_TRUE ? "true" : "false"); + break; + } + + case DATA_TYPE_BYTE: { + FPRINTF(fp, "%hhu", fnvpair_value_byte(curr)); + break; + } + + case DATA_TYPE_INT8: { + FPRINTF(fp, "%hhd", fnvpair_value_int8(curr)); + break; + } + + case DATA_TYPE_UINT8: { + FPRINTF(fp, "%hhu", fnvpair_value_uint8_t(curr)); + break; + } + + case DATA_TYPE_INT16: { + FPRINTF(fp, "%hd", fnvpair_value_int16(curr)); + break; + } + + case DATA_TYPE_UINT16: { + FPRINTF(fp, "%hu", fnvpair_value_uint16(curr)); + break; + } + + case DATA_TYPE_INT32: { + FPRINTF(fp, "%d", fnvpair_value_int32(curr)); + break; + } + + case DATA_TYPE_UINT32: { + FPRINTF(fp, "%u", fnvpair_value_uint32(curr)); + break; + } + + case DATA_TYPE_INT64: { + FPRINTF(fp, "%lld", + (long long)fnvpair_value_int64(curr)); + break; + } + + case DATA_TYPE_UINT64: { + FPRINTF(fp, "%llu", + (unsigned long long)fnvpair_value_uint64(curr)); + break; + } + + case DATA_TYPE_HRTIME: { + hrtime_t val; + VERIFY0(nvpair_value_hrtime(curr, &val)); + FPRINTF(fp, "%llu", (unsigned long long)val); + break; + } + + case DATA_TYPE_DOUBLE: { + double val; + VERIFY0(nvpair_value_double(curr, &val)); + FPRINTF(fp, "%f", val); + break; + } + + case DATA_TYPE_NVLIST: { + if (nvlist_print_json(fp, + fnvpair_value_nvlist(curr)) == -1) + return (-1); + break; + } + + case DATA_TYPE_STRING_ARRAY: { + char **val; + uint_t valsz, i; + VERIFY0(nvpair_value_string_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + if (nvlist_print_json_string(fp, val[i]) == -1) + return (-1); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **val; + uint_t valsz, i; + VERIFY0(nvpair_value_nvlist_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + if (nvlist_print_json(fp, val[i]) == -1) + return (-1); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_BOOLEAN_ARRAY: { + boolean_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_boolean_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, val[i] == B_TRUE ? + "true" : "false"); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_BYTE_ARRAY: { + uchar_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_byte_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%hhu", val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_UINT8_ARRAY: { + uint8_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_uint8_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%hhu", val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_INT8_ARRAY: { + int8_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_int8_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%hhd", val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_UINT16_ARRAY: { + uint16_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_uint16_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%hu", val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_INT16_ARRAY: { + int16_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_int16_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%hd", val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_UINT32_ARRAY: { + uint32_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_uint32_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%u", val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_INT32_ARRAY: { + int32_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_int32_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%d", val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_UINT64_ARRAY: { + uint64_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_uint64_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%llu", + (unsigned long long)val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_INT64_ARRAY: { + int64_t *val; + uint_t valsz, i; + VERIFY0(nvpair_value_int64_array(curr, &val, &valsz)); + FPRINTF(fp, "["); + for (i = 0; i < valsz; i++) { + if (i > 0) + FPRINTF(fp, ","); + FPRINTF(fp, "%lld", (long long)val[i]); + } + FPRINTF(fp, "]"); + break; + } + + case DATA_TYPE_UNKNOWN: + return (-1); + } + } + + FPRINTF(fp, "}"); + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h new file mode 100644 index 0000000..7a5f8a8 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h @@ -0,0 +1,391 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _LIBUUTIL_H +#define _LIBUUTIL_H + +#include <solaris.h> +#include <sys/types.h> +#include <stdarg.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Standard flags codes. + */ +#define UU_DEFAULT 0 + +/* + * Standard error codes. + */ +#define UU_ERROR_NONE 0 /* no error */ +#define UU_ERROR_INVALID_ARGUMENT 1 /* invalid argument */ +#define UU_ERROR_UNKNOWN_FLAG 2 /* passed flag invalid */ +#define UU_ERROR_NO_MEMORY 3 /* out of memory */ +#define UU_ERROR_CALLBACK_FAILED 4 /* callback-initiated error */ +#define UU_ERROR_NOT_SUPPORTED 5 /* operation not supported */ +#define UU_ERROR_EMPTY 6 /* no value provided */ +#define UU_ERROR_UNDERFLOW 7 /* value is too small */ +#define UU_ERROR_OVERFLOW 8 /* value is too value */ +#define UU_ERROR_INVALID_CHAR 9 /* value contains unexpected char */ +#define UU_ERROR_INVALID_DIGIT 10 /* value contains digit not in base */ + +#define UU_ERROR_SYSTEM 99 /* underlying system error */ +#define UU_ERROR_UNKNOWN 100 /* error status not known */ + +/* + * Standard program exit codes. + */ +#define UU_EXIT_OK (*(uu_exit_ok())) +#define UU_EXIT_FATAL (*(uu_exit_fatal())) +#define UU_EXIT_USAGE (*(uu_exit_usage())) + +/* + * Exit status profiles. + */ +#define UU_PROFILE_DEFAULT 0 +#define UU_PROFILE_LAUNCHER 1 + +/* + * Error reporting functions. + */ +uint32_t uu_error(void); +const char *uu_strerror(uint32_t); + +/* + * Program notification functions. + */ +extern void uu_alt_exit(int); +extern const char *uu_setpname(char *); +extern const char *uu_getpname(void); +/*PRINTFLIKE1*/ +extern void uu_warn(const char *, ...); +extern void uu_vwarn(const char *, va_list); +/*PRINTFLIKE1*/ +extern void uu_die(const char *, ...) __NORETURN; +extern void uu_vdie(const char *, va_list) __NORETURN; +/*PRINTFLIKE2*/ +extern void uu_xdie(int, const char *, ...) __NORETURN; +extern void uu_vxdie(int, const char *, va_list) __NORETURN; + +/* + * Exit status functions (not to be used directly) + */ +extern int *uu_exit_ok(void); +extern int *uu_exit_fatal(void); +extern int *uu_exit_usage(void); + +/* + * string->number conversions + */ +extern int uu_strtoint(const char *, void *, size_t, int, int64_t, int64_t); +extern int uu_strtouint(const char *, void *, size_t, int, uint64_t, uint64_t); + +/* + * Debug print facility functions. + */ +typedef struct uu_dprintf uu_dprintf_t; + +typedef enum { + UU_DPRINTF_SILENT, + UU_DPRINTF_FATAL, + UU_DPRINTF_WARNING, + UU_DPRINTF_NOTICE, + UU_DPRINTF_INFO, + UU_DPRINTF_DEBUG +} uu_dprintf_severity_t; + +extern uu_dprintf_t *uu_dprintf_create(const char *, uu_dprintf_severity_t, + uint_t); +/*PRINTFLIKE3*/ +extern void uu_dprintf(uu_dprintf_t *, uu_dprintf_severity_t, + const char *, ...); +extern void uu_dprintf_destroy(uu_dprintf_t *); +extern const char *uu_dprintf_getname(uu_dprintf_t *); + +/* + * Identifier test flags and function. + */ +#define UU_NAME_DOMAIN 0x1 /* allow SUNW, or com.sun, prefix */ +#define UU_NAME_PATH 0x2 /* allow '/'-delimited paths */ + +int uu_check_name(const char *, uint_t); + +/* + * File creation functions. + */ +extern int uu_open_tmp(const char *dir, uint_t uflags); + +/* + * Convenience functions. + */ +#define UU_NELEM(a) (sizeof (a) / sizeof ((a)[0])) + +/*PRINTFLIKE1*/ +extern char *uu_msprintf(const char *format, ...); +extern void *uu_zalloc(size_t); +extern char *uu_strdup(const char *); +extern void uu_free(void *); + +extern boolean_t uu_strcaseeq(const char *a, const char *b); +extern boolean_t uu_streq(const char *a, const char *b); +extern char *uu_strndup(const char *s, size_t n); +extern boolean_t uu_strbw(const char *a, const char *b); +extern void *uu_memdup(const void *buf, size_t sz); +extern void uu_dump(FILE *out, const char *prefix, const void *buf, size_t len); + +/* + * Comparison function type definition. + * Developers should be careful in their use of the _private argument. If you + * break interface guarantees, you get undefined behavior. + */ +typedef int uu_compare_fn_t(const void *__left, const void *__right, + void *__private); + +/* + * Walk variant flags. + * A data structure need not provide support for all variants and + * combinations. Refer to the appropriate documentation. + */ +#define UU_WALK_ROBUST 0x00000001 /* walk can survive removes */ +#define UU_WALK_REVERSE 0x00000002 /* reverse walk order */ + +#define UU_WALK_PREORDER 0x00000010 /* walk tree in pre-order */ +#define UU_WALK_POSTORDER 0x00000020 /* walk tree in post-order */ + +/* + * Walk callback function return codes. + */ +#define UU_WALK_ERROR -1 +#define UU_WALK_NEXT 0 +#define UU_WALK_DONE 1 + +/* + * Walk callback function type definition. + */ +typedef int uu_walk_fn_t(void *_elem, void *_private); + +/* + * lists: opaque structures + */ +typedef struct uu_list_pool uu_list_pool_t; +typedef struct uu_list uu_list_t; + +typedef struct uu_list_node { + uintptr_t uln_opaque[2]; +} uu_list_node_t; + +typedef struct uu_list_walk uu_list_walk_t; + +typedef uintptr_t uu_list_index_t; + +/* + * lists: interface + * + * basic usage: + * typedef struct foo { + * ... + * uu_list_node_t foo_node; + * ... + * } foo_t; + * + * static int + * foo_compare(void *l_arg, void *r_arg, void *private) + * { + * foo_t *l = l_arg; + * foo_t *r = r_arg; + * + * if (... l greater than r ...) + * return (1); + * if (... l less than r ...) + * return (-1); + * return (0); + * } + * + * ... + * // at initialization time + * foo_pool = uu_list_pool_create("foo_pool", + * sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare, + * debugging? 0 : UU_AVL_POOL_DEBUG); + * ... + */ +uu_list_pool_t *uu_list_pool_create(const char *, size_t, size_t, + uu_compare_fn_t *, uint32_t); +#define UU_LIST_POOL_DEBUG 0x00000001 + +void uu_list_pool_destroy(uu_list_pool_t *); + +/* + * usage: + * + * foo_t *a; + * a = malloc(sizeof(*a)); + * uu_list_node_init(a, &a->foo_list, pool); + * ... + * uu_list_node_fini(a, &a->foo_list, pool); + * free(a); + */ +void uu_list_node_init(void *, uu_list_node_t *, uu_list_pool_t *); +void uu_list_node_fini(void *, uu_list_node_t *, uu_list_pool_t *); + +uu_list_t *uu_list_create(uu_list_pool_t *, void *_parent, uint32_t); +#define UU_LIST_DEBUG 0x00000001 +#define UU_LIST_SORTED 0x00000002 /* list is sorted */ + +void uu_list_destroy(uu_list_t *); /* list must be empty */ + +size_t uu_list_numnodes(uu_list_t *); + +void *uu_list_first(uu_list_t *); +void *uu_list_last(uu_list_t *); + +void *uu_list_next(uu_list_t *, void *); +void *uu_list_prev(uu_list_t *, void *); + +int uu_list_walk(uu_list_t *, uu_walk_fn_t *, void *, uint32_t); + +uu_list_walk_t *uu_list_walk_start(uu_list_t *, uint32_t); +void *uu_list_walk_next(uu_list_walk_t *); +void uu_list_walk_end(uu_list_walk_t *); + +void *uu_list_find(uu_list_t *, void *, void *, uu_list_index_t *); +void uu_list_insert(uu_list_t *, void *, uu_list_index_t); + +void *uu_list_nearest_next(uu_list_t *, uu_list_index_t); +void *uu_list_nearest_prev(uu_list_t *, uu_list_index_t); + +void *uu_list_teardown(uu_list_t *, void **); + +void uu_list_remove(uu_list_t *, void *); + +/* + * lists: interfaces for non-sorted lists only + */ +int uu_list_insert_before(uu_list_t *, void *_target, void *_elem); +int uu_list_insert_after(uu_list_t *, void *_target, void *_elem); + +/* + * avl trees: opaque structures + */ +typedef struct uu_avl_pool uu_avl_pool_t; +typedef struct uu_avl uu_avl_t; + +typedef struct uu_avl_node { +#ifdef _LP64 + uintptr_t uan_opaque[3]; +#else + uintptr_t uan_opaque[4]; +#endif +} uu_avl_node_t; + +typedef struct uu_avl_walk uu_avl_walk_t; + +typedef uintptr_t uu_avl_index_t; + +/* + * avl trees: interface + * + * basic usage: + * typedef struct foo { + * ... + * uu_avl_node_t foo_node; + * ... + * } foo_t; + * + * static int + * foo_compare(void *l_arg, void *r_arg, void *private) + * { + * foo_t *l = l_arg; + * foo_t *r = r_arg; + * + * if (... l greater than r ...) + * return (1); + * if (... l less than r ...) + * return (-1); + * return (0); + * } + * + * ... + * // at initialization time + * foo_pool = uu_avl_pool_create("foo_pool", + * sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare, + * debugging? 0 : UU_AVL_POOL_DEBUG); + * ... + */ +uu_avl_pool_t *uu_avl_pool_create(const char *, size_t, size_t, + uu_compare_fn_t *, uint32_t); +#define UU_AVL_POOL_DEBUG 0x00000001 + +void uu_avl_pool_destroy(uu_avl_pool_t *); + +/* + * usage: + * + * foo_t *a; + * a = malloc(sizeof(*a)); + * uu_avl_node_init(a, &a->foo_avl, pool); + * ... + * uu_avl_node_fini(a, &a->foo_avl, pool); + * free(a); + */ +void uu_avl_node_init(void *, uu_avl_node_t *, uu_avl_pool_t *); +void uu_avl_node_fini(void *, uu_avl_node_t *, uu_avl_pool_t *); + +uu_avl_t *uu_avl_create(uu_avl_pool_t *, void *_parent, uint32_t); +#define UU_AVL_DEBUG 0x00000001 + +void uu_avl_destroy(uu_avl_t *); /* list must be empty */ + +size_t uu_avl_numnodes(uu_avl_t *); + +void *uu_avl_first(uu_avl_t *); +void *uu_avl_last(uu_avl_t *); + +void *uu_avl_next(uu_avl_t *, void *); +void *uu_avl_prev(uu_avl_t *, void *); + +int uu_avl_walk(uu_avl_t *, uu_walk_fn_t *, void *, uint32_t); + +uu_avl_walk_t *uu_avl_walk_start(uu_avl_t *, uint32_t); +void *uu_avl_walk_next(uu_avl_walk_t *); +void uu_avl_walk_end(uu_avl_walk_t *); + +void *uu_avl_find(uu_avl_t *, void *, void *, uu_avl_index_t *); +void uu_avl_insert(uu_avl_t *, void *, uu_avl_index_t); + +void *uu_avl_nearest_next(uu_avl_t *, uu_avl_index_t); +void *uu_avl_nearest_prev(uu_avl_t *, uu_avl_index_t); + +void *uu_avl_teardown(uu_avl_t *, void **); + +void uu_avl_remove(uu_avl_t *, void *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBUUTIL_H */ diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h new file mode 100644 index 0000000..9ebaaed --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_common.h @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBUUTIL_COMMON_H +#define _LIBUUTIL_COMMON_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libuutil.h> +#include <libuutil_impl.h> + +#endif /* _LIBUUTIL_COMMON_H */ diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h new file mode 100644 index 0000000..9466e59 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil_impl.h @@ -0,0 +1,181 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBUUTIL_IMPL_H +#define _LIBUUTIL_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libuutil.h> +#include <pthread.h> + +#include <sys/avl_impl.h> +#include <sys/byteorder.h> + +#ifdef __cplusplus +extern "C" { +#endif + +void uu_set_error(uint_t); +#pragma rarely_called(uu_set_error) + +/*PRINTFLIKE1*/ +void uu_panic(const char *format, ...); +#pragma rarely_called(uu_panic) + +struct uu_dprintf { + char *uud_name; + uu_dprintf_severity_t uud_severity; + uint_t uud_flags; +}; + +/* + * For debugging purposes, libuutil keeps around linked lists of all uu_lists + * and uu_avls, along with pointers to their parents. These can cause false + * negatives when looking for memory leaks, so we encode the pointers by + * storing them with swapped endianness; this is not perfect, but it's about + * the best we can do without wasting a lot of space. + */ +#ifdef _LP64 +#define UU_PTR_ENCODE(ptr) BSWAP_64((uintptr_t)(void *)(ptr)) +#else +#define UU_PTR_ENCODE(ptr) BSWAP_32((uintptr_t)(void *)(ptr)) +#endif + +#define UU_PTR_DECODE(ptr) ((void *)UU_PTR_ENCODE(ptr)) + +/* + * uu_list structures + */ +typedef struct uu_list_node_impl { + struct uu_list_node_impl *uln_next; + struct uu_list_node_impl *uln_prev; +} uu_list_node_impl_t; + +struct uu_list_walk { + uu_list_walk_t *ulw_next; + uu_list_walk_t *ulw_prev; + + uu_list_t *ulw_list; + int8_t ulw_dir; + uint8_t ulw_robust; + uu_list_node_impl_t *ulw_next_result; +}; + +struct uu_list { + uintptr_t ul_next_enc; + uintptr_t ul_prev_enc; + + uu_list_pool_t *ul_pool; + uintptr_t ul_parent_enc; /* encoded parent pointer */ + size_t ul_offset; + size_t ul_numnodes; + uint8_t ul_debug; + uint8_t ul_sorted; + uint8_t ul_index; /* mark for uu_list_index_ts */ + + uu_list_node_impl_t ul_null_node; + uu_list_walk_t ul_null_walk; /* for robust walkers */ +}; + +#define UU_LIST_PTR(ptr) ((uu_list_t *)UU_PTR_DECODE(ptr)) + +#define UU_LIST_POOL_MAXNAME 64 + +struct uu_list_pool { + uu_list_pool_t *ulp_next; + uu_list_pool_t *ulp_prev; + + char ulp_name[UU_LIST_POOL_MAXNAME]; + size_t ulp_nodeoffset; + size_t ulp_objsize; + uu_compare_fn_t *ulp_cmp; + uint8_t ulp_debug; + uint8_t ulp_last_index; + pthread_mutex_t ulp_lock; /* protects null_list */ + uu_list_t ulp_null_list; +}; + +/* + * uu_avl structures + */ +typedef struct avl_node uu_avl_node_impl_t; + +struct uu_avl_walk { + uu_avl_walk_t *uaw_next; + uu_avl_walk_t *uaw_prev; + + uu_avl_t *uaw_avl; + void *uaw_next_result; + int8_t uaw_dir; + uint8_t uaw_robust; +}; + +struct uu_avl { + uintptr_t ua_next_enc; + uintptr_t ua_prev_enc; + + uu_avl_pool_t *ua_pool; + uintptr_t ua_parent_enc; + uint8_t ua_debug; + uint8_t ua_index; /* mark for uu_avl_index_ts */ + + struct avl_tree ua_tree; + uu_avl_walk_t ua_null_walk; +}; + +#define UU_AVL_PTR(x) ((uu_avl_t *)UU_PTR_DECODE(x)) + +#define UU_AVL_POOL_MAXNAME 64 + +struct uu_avl_pool { + uu_avl_pool_t *uap_next; + uu_avl_pool_t *uap_prev; + + char uap_name[UU_AVL_POOL_MAXNAME]; + size_t uap_nodeoffset; + size_t uap_objsize; + uu_compare_fn_t *uap_cmp; + uint8_t uap_debug; + uint8_t uap_last_index; + pthread_mutex_t uap_lock; /* protects null_avl */ + uu_avl_t uap_null_avl; +}; + +/* + * atfork() handlers + */ +void uu_avl_lockup(void); +void uu_avl_release(void); + +void uu_list_lockup(void); +void uu_list_release(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBUUTIL_IMPL_H */ diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c new file mode 100644 index 0000000..2bef759 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include "libuutil_common.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +void * +uu_zalloc(size_t n) +{ + void *p = malloc(n); + + if (p == NULL) { + uu_set_error(UU_ERROR_SYSTEM); + return (NULL); + } + + (void) memset(p, 0, n); + + return (p); +} + +void +uu_free(void *p) +{ + free(p); +} + +char * +uu_strdup(const char *str) +{ + char *buf = NULL; + + if (str != NULL) { + size_t sz; + + sz = strlen(str) + 1; + buf = uu_zalloc(sz); + if (buf != NULL) + (void) memcpy(buf, str, sz); + } + return (buf); +} + +/* + * Duplicate up to n bytes of a string. Kind of sort of like + * strdup(strlcpy(s, n)). + */ +char * +uu_strndup(const char *s, size_t n) +{ + size_t len; + char *p; + + len = strnlen(s, n); + p = uu_zalloc(len + 1); + if (p == NULL) + return (NULL); + + if (len > 0) + (void) memcpy(p, s, len); + p[len] = '\0'; + + return (p); +} + +/* + * Duplicate a block of memory. Combines malloc with memcpy, much as + * strdup combines malloc, strlen, and strcpy. + */ +void * +uu_memdup(const void *buf, size_t sz) +{ + void *p; + + p = uu_zalloc(sz); + if (p == NULL) + return (NULL); + (void) memcpy(p, buf, sz); + return (p); +} + +char * +uu_msprintf(const char *format, ...) +{ + va_list args; + char attic[1]; + uint_t M, m; + char *b; + + va_start(args, format); + M = vsnprintf(attic, 1, format, args); + va_end(args); + + for (;;) { + m = M; + if ((b = uu_zalloc(m + 1)) == NULL) + return (NULL); + + va_start(args, format); + M = vsnprintf(b, m + 1, format, args); + va_end(args); + + if (M == m) + break; /* sizes match */ + + uu_free(b); + } + + return (b); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c new file mode 100644 index 0000000..5e78ece --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_avl.c @@ -0,0 +1,570 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libuutil_common.h" + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/avl.h> + +static uu_avl_pool_t uu_null_apool = { &uu_null_apool, &uu_null_apool }; +static pthread_mutex_t uu_apool_list_lock = PTHREAD_MUTEX_INITIALIZER; + +/* + * The index mark change on every insert and delete, to catch stale + * references. + * + * We leave the low bit alone, since the avl code uses it. + */ +#define INDEX_MAX (sizeof (uintptr_t) - 2) +#define INDEX_NEXT(m) (((m) == INDEX_MAX)? 2 : ((m) + 2) & INDEX_MAX) + +#define INDEX_DECODE(i) ((i) & ~INDEX_MAX) +#define INDEX_ENCODE(p, n) (((n) & ~INDEX_MAX) | (p)->ua_index) +#define INDEX_VALID(p, i) (((i) & INDEX_MAX) == (p)->ua_index) +#define INDEX_CHECK(i) (((i) & INDEX_MAX) != 0) + +/* + * When an element is inactive (not in a tree), we keep a marked pointer to + * its containing pool in its first word, and a NULL pointer in its second. + * + * On insert, we use these to verify that it comes from the correct pool. + */ +#define NODE_ARRAY(p, n) ((uintptr_t *)((uintptr_t)(n) + \ + (pp)->uap_nodeoffset)) + +#define POOL_TO_MARKER(pp) (((uintptr_t)(pp) | 1)) + +#define DEAD_MARKER 0xc4 + +uu_avl_pool_t * +uu_avl_pool_create(const char *name, size_t objsize, size_t nodeoffset, + uu_compare_fn_t *compare_func, uint32_t flags) +{ + uu_avl_pool_t *pp, *next, *prev; + + if (name == NULL || + uu_check_name(name, UU_NAME_DOMAIN) == -1 || + nodeoffset + sizeof (uu_avl_node_t) > objsize || + compare_func == NULL) { + uu_set_error(UU_ERROR_INVALID_ARGUMENT); + return (NULL); + } + + if (flags & ~UU_AVL_POOL_DEBUG) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (NULL); + } + + pp = uu_zalloc(sizeof (uu_avl_pool_t)); + if (pp == NULL) { + uu_set_error(UU_ERROR_NO_MEMORY); + return (NULL); + } + + (void) strlcpy(pp->uap_name, name, sizeof (pp->uap_name)); + pp->uap_nodeoffset = nodeoffset; + pp->uap_objsize = objsize; + pp->uap_cmp = compare_func; + if (flags & UU_AVL_POOL_DEBUG) + pp->uap_debug = 1; + pp->uap_last_index = 0; + + (void) pthread_mutex_init(&pp->uap_lock, NULL); + + pp->uap_null_avl.ua_next_enc = UU_PTR_ENCODE(&pp->uap_null_avl); + pp->uap_null_avl.ua_prev_enc = UU_PTR_ENCODE(&pp->uap_null_avl); + + (void) pthread_mutex_lock(&uu_apool_list_lock); + pp->uap_next = next = &uu_null_apool; + pp->uap_prev = prev = next->uap_prev; + next->uap_prev = pp; + prev->uap_next = pp; + (void) pthread_mutex_unlock(&uu_apool_list_lock); + + return (pp); +} + +void +uu_avl_pool_destroy(uu_avl_pool_t *pp) +{ + if (pp->uap_debug) { + if (pp->uap_null_avl.ua_next_enc != + UU_PTR_ENCODE(&pp->uap_null_avl) || + pp->uap_null_avl.ua_prev_enc != + UU_PTR_ENCODE(&pp->uap_null_avl)) { + uu_panic("uu_avl_pool_destroy: Pool \"%.*s\" (%p) has " + "outstanding avls, or is corrupt.\n", + (int)sizeof (pp->uap_name), pp->uap_name, + (void *)pp); + } + } + (void) pthread_mutex_lock(&uu_apool_list_lock); + pp->uap_next->uap_prev = pp->uap_prev; + pp->uap_prev->uap_next = pp->uap_next; + (void) pthread_mutex_unlock(&uu_apool_list_lock); + (void) pthread_mutex_destroy(&pp->uap_lock); + pp->uap_prev = NULL; + pp->uap_next = NULL; + uu_free(pp); +} + +void +uu_avl_node_init(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp) +{ + uintptr_t *na = (uintptr_t *)np; + + if (pp->uap_debug) { + uintptr_t offset = (uintptr_t)np - (uintptr_t)base; + if (offset + sizeof (*np) > pp->uap_objsize) { + uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): " + "offset %ld doesn't fit in object (size %ld)\n", + base, (void *)np, (void *)pp, pp->uap_name, + (long)offset, (long)pp->uap_objsize); + } + if (offset != pp->uap_nodeoffset) { + uu_panic("uu_avl_node_init(%p, %p, %p (\"%s\")): " + "offset %ld doesn't match pool's offset (%ld)\n", + base, (void *)np, (void *)pp, pp->uap_name, + (long)offset, (long)pp->uap_objsize); + } + } + + na[0] = POOL_TO_MARKER(pp); + na[1] = 0; +} + +void +uu_avl_node_fini(void *base, uu_avl_node_t *np, uu_avl_pool_t *pp) +{ + uintptr_t *na = (uintptr_t *)np; + + if (pp->uap_debug) { + if (na[0] == DEAD_MARKER && na[1] == DEAD_MARKER) { + uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): " + "node already finied\n", + base, (void *)np, (void *)pp, pp->uap_name); + } + if (na[0] != POOL_TO_MARKER(pp) || na[1] != 0) { + uu_panic("uu_avl_node_fini(%p, %p, %p (\"%s\")): " + "node corrupt, in tree, or in different pool\n", + base, (void *)np, (void *)pp, pp->uap_name); + } + } + + na[0] = DEAD_MARKER; + na[1] = DEAD_MARKER; + na[2] = DEAD_MARKER; +} + +struct uu_avl_node_compare_info { + uu_compare_fn_t *ac_compare; + void *ac_private; + void *ac_right; + void *ac_found; +}; + +static int +uu_avl_node_compare(const void *l, const void *r) +{ + struct uu_avl_node_compare_info *info = + (struct uu_avl_node_compare_info *)l; + + int res = info->ac_compare(r, info->ac_right, info->ac_private); + + if (res == 0) { + if (info->ac_found == NULL) + info->ac_found = (void *)r; + return (-1); + } + if (res < 0) + return (1); + return (-1); +} + +uu_avl_t * +uu_avl_create(uu_avl_pool_t *pp, void *parent, uint32_t flags) +{ + uu_avl_t *ap, *next, *prev; + + if (flags & ~UU_AVL_DEBUG) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (NULL); + } + + ap = uu_zalloc(sizeof (*ap)); + if (ap == NULL) { + uu_set_error(UU_ERROR_NO_MEMORY); + return (NULL); + } + + ap->ua_pool = pp; + ap->ua_parent_enc = UU_PTR_ENCODE(parent); + ap->ua_debug = pp->uap_debug || (flags & UU_AVL_DEBUG); + ap->ua_index = (pp->uap_last_index = INDEX_NEXT(pp->uap_last_index)); + + avl_create(&ap->ua_tree, &uu_avl_node_compare, pp->uap_objsize, + pp->uap_nodeoffset); + + ap->ua_null_walk.uaw_next = &ap->ua_null_walk; + ap->ua_null_walk.uaw_prev = &ap->ua_null_walk; + + (void) pthread_mutex_lock(&pp->uap_lock); + next = &pp->uap_null_avl; + prev = UU_PTR_DECODE(next->ua_prev_enc); + ap->ua_next_enc = UU_PTR_ENCODE(next); + ap->ua_prev_enc = UU_PTR_ENCODE(prev); + next->ua_prev_enc = UU_PTR_ENCODE(ap); + prev->ua_next_enc = UU_PTR_ENCODE(ap); + (void) pthread_mutex_unlock(&pp->uap_lock); + + return (ap); +} + +void +uu_avl_destroy(uu_avl_t *ap) +{ + uu_avl_pool_t *pp = ap->ua_pool; + + if (ap->ua_debug) { + if (avl_numnodes(&ap->ua_tree) != 0) { + uu_panic("uu_avl_destroy(%p): tree not empty\n", + (void *)ap); + } + if (ap->ua_null_walk.uaw_next != &ap->ua_null_walk || + ap->ua_null_walk.uaw_prev != &ap->ua_null_walk) { + uu_panic("uu_avl_destroy(%p): outstanding walkers\n", + (void *)ap); + } + } + (void) pthread_mutex_lock(&pp->uap_lock); + UU_AVL_PTR(ap->ua_next_enc)->ua_prev_enc = ap->ua_prev_enc; + UU_AVL_PTR(ap->ua_prev_enc)->ua_next_enc = ap->ua_next_enc; + (void) pthread_mutex_unlock(&pp->uap_lock); + ap->ua_prev_enc = UU_PTR_ENCODE(NULL); + ap->ua_next_enc = UU_PTR_ENCODE(NULL); + + ap->ua_pool = NULL; + avl_destroy(&ap->ua_tree); + + uu_free(ap); +} + +size_t +uu_avl_numnodes(uu_avl_t *ap) +{ + return (avl_numnodes(&ap->ua_tree)); +} + +void * +uu_avl_first(uu_avl_t *ap) +{ + return (avl_first(&ap->ua_tree)); +} + +void * +uu_avl_last(uu_avl_t *ap) +{ + return (avl_last(&ap->ua_tree)); +} + +void * +uu_avl_next(uu_avl_t *ap, void *node) +{ + return (AVL_NEXT(&ap->ua_tree, node)); +} + +void * +uu_avl_prev(uu_avl_t *ap, void *node) +{ + return (AVL_PREV(&ap->ua_tree, node)); +} + +static void +_avl_walk_init(uu_avl_walk_t *wp, uu_avl_t *ap, uint32_t flags) +{ + uu_avl_walk_t *next, *prev; + + int robust = (flags & UU_WALK_ROBUST); + int direction = (flags & UU_WALK_REVERSE)? -1 : 1; + + (void) memset(wp, 0, sizeof (*wp)); + wp->uaw_avl = ap; + wp->uaw_robust = robust; + wp->uaw_dir = direction; + + if (direction > 0) + wp->uaw_next_result = avl_first(&ap->ua_tree); + else + wp->uaw_next_result = avl_last(&ap->ua_tree); + + if (ap->ua_debug || robust) { + wp->uaw_next = next = &ap->ua_null_walk; + wp->uaw_prev = prev = next->uaw_prev; + next->uaw_prev = wp; + prev->uaw_next = wp; + } +} + +static void * +_avl_walk_advance(uu_avl_walk_t *wp, uu_avl_t *ap) +{ + void *np = wp->uaw_next_result; + + avl_tree_t *t = &ap->ua_tree; + + if (np == NULL) + return (NULL); + + wp->uaw_next_result = (wp->uaw_dir > 0)? AVL_NEXT(t, np) : + AVL_PREV(t, np); + + return (np); +} + +static void +_avl_walk_fini(uu_avl_walk_t *wp) +{ + if (wp->uaw_next != NULL) { + wp->uaw_next->uaw_prev = wp->uaw_prev; + wp->uaw_prev->uaw_next = wp->uaw_next; + wp->uaw_next = NULL; + wp->uaw_prev = NULL; + } + wp->uaw_avl = NULL; + wp->uaw_next_result = NULL; +} + +uu_avl_walk_t * +uu_avl_walk_start(uu_avl_t *ap, uint32_t flags) +{ + uu_avl_walk_t *wp; + + if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (NULL); + } + + wp = uu_zalloc(sizeof (*wp)); + if (wp == NULL) { + uu_set_error(UU_ERROR_NO_MEMORY); + return (NULL); + } + + _avl_walk_init(wp, ap, flags); + return (wp); +} + +void * +uu_avl_walk_next(uu_avl_walk_t *wp) +{ + return (_avl_walk_advance(wp, wp->uaw_avl)); +} + +void +uu_avl_walk_end(uu_avl_walk_t *wp) +{ + _avl_walk_fini(wp); + uu_free(wp); +} + +int +uu_avl_walk(uu_avl_t *ap, uu_walk_fn_t *func, void *private, uint32_t flags) +{ + void *e; + uu_avl_walk_t my_walk; + + int status = UU_WALK_NEXT; + + if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (-1); + } + + _avl_walk_init(&my_walk, ap, flags); + while (status == UU_WALK_NEXT && + (e = _avl_walk_advance(&my_walk, ap)) != NULL) + status = (*func)(e, private); + _avl_walk_fini(&my_walk); + + if (status >= 0) + return (0); + uu_set_error(UU_ERROR_CALLBACK_FAILED); + return (-1); +} + +void +uu_avl_remove(uu_avl_t *ap, void *elem) +{ + uu_avl_walk_t *wp; + uu_avl_pool_t *pp = ap->ua_pool; + uintptr_t *na = NODE_ARRAY(pp, elem); + + if (ap->ua_debug) { + /* + * invalidate outstanding uu_avl_index_ts. + */ + ap->ua_index = INDEX_NEXT(ap->ua_index); + } + + /* + * Robust walkers most be advanced, if we are removing the node + * they are currently using. In debug mode, non-robust walkers + * are also on the walker list. + */ + for (wp = ap->ua_null_walk.uaw_next; wp != &ap->ua_null_walk; + wp = wp->uaw_next) { + if (wp->uaw_robust) { + if (elem == wp->uaw_next_result) + (void) _avl_walk_advance(wp, ap); + } else if (wp->uaw_next_result != NULL) { + uu_panic("uu_avl_remove(%p, %p): active non-robust " + "walker\n", (void *)ap, elem); + } + } + + avl_remove(&ap->ua_tree, elem); + + na[0] = POOL_TO_MARKER(pp); + na[1] = 0; +} + +void * +uu_avl_teardown(uu_avl_t *ap, void **cookie) +{ + void *elem = avl_destroy_nodes(&ap->ua_tree, cookie); + + if (elem != NULL) { + uu_avl_pool_t *pp = ap->ua_pool; + uintptr_t *na = NODE_ARRAY(pp, elem); + + na[0] = POOL_TO_MARKER(pp); + na[1] = 0; + } + return (elem); +} + +void * +uu_avl_find(uu_avl_t *ap, void *elem, void *private, uu_avl_index_t *out) +{ + struct uu_avl_node_compare_info info; + void *result; + + info.ac_compare = ap->ua_pool->uap_cmp; + info.ac_private = private; + info.ac_right = elem; + info.ac_found = NULL; + + result = avl_find(&ap->ua_tree, &info, out); + if (out != NULL) + *out = INDEX_ENCODE(ap, *out); + + if (ap->ua_debug && result != NULL) + uu_panic("uu_avl_find: internal error: avl_find succeeded\n"); + + return (info.ac_found); +} + +void +uu_avl_insert(uu_avl_t *ap, void *elem, uu_avl_index_t idx) +{ + if (ap->ua_debug) { + uu_avl_pool_t *pp = ap->ua_pool; + uintptr_t *na = NODE_ARRAY(pp, elem); + + if (na[1] != 0) + uu_panic("uu_avl_insert(%p, %p, %p): node already " + "in tree, or corrupt\n", + (void *)ap, elem, (void *)idx); + if (na[0] == 0) + uu_panic("uu_avl_insert(%p, %p, %p): node not " + "initialized\n", + (void *)ap, elem, (void *)idx); + if (na[0] != POOL_TO_MARKER(pp)) + uu_panic("uu_avl_insert(%p, %p, %p): node from " + "other pool, or corrupt\n", + (void *)ap, elem, (void *)idx); + + if (!INDEX_VALID(ap, idx)) + uu_panic("uu_avl_insert(%p, %p, %p): %s\n", + (void *)ap, elem, (void *)idx, + INDEX_CHECK(idx)? "outdated index" : + "invalid index"); + + /* + * invalidate outstanding uu_avl_index_ts. + */ + ap->ua_index = INDEX_NEXT(ap->ua_index); + } + avl_insert(&ap->ua_tree, elem, INDEX_DECODE(idx)); +} + +void * +uu_avl_nearest_next(uu_avl_t *ap, uu_avl_index_t idx) +{ + if (ap->ua_debug && !INDEX_VALID(ap, idx)) + uu_panic("uu_avl_nearest_next(%p, %p): %s\n", + (void *)ap, (void *)idx, INDEX_CHECK(idx)? + "outdated index" : "invalid index"); + return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_AFTER)); +} + +void * +uu_avl_nearest_prev(uu_avl_t *ap, uu_avl_index_t idx) +{ + if (ap->ua_debug && !INDEX_VALID(ap, idx)) + uu_panic("uu_avl_nearest_prev(%p, %p): %s\n", + (void *)ap, (void *)idx, INDEX_CHECK(idx)? + "outdated index" : "invalid index"); + return (avl_nearest(&ap->ua_tree, INDEX_DECODE(idx), AVL_BEFORE)); +} + +/* + * called from uu_lockup() and uu_release(), as part of our fork1()-safety. + */ +void +uu_avl_lockup(void) +{ + uu_avl_pool_t *pp; + + (void) pthread_mutex_lock(&uu_apool_list_lock); + for (pp = uu_null_apool.uap_next; pp != &uu_null_apool; + pp = pp->uap_next) + (void) pthread_mutex_lock(&pp->uap_lock); +} + +void +uu_avl_release(void) +{ + uu_avl_pool_t *pp; + + for (pp = uu_null_apool.uap_next; pp != &uu_null_apool; + pp = pp->uap_next) + (void) pthread_mutex_unlock(&pp->uap_lock); + (void) pthread_mutex_unlock(&uu_apool_list_lock); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c new file mode 100644 index 0000000..528c3e7 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_dprintf.c @@ -0,0 +1,128 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libuutil_common.h" + +#include <errno.h> +#include <libintl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define FACILITY_FMT "%s (%s): " + +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + +static const char * +strseverity(uu_dprintf_severity_t severity) +{ + switch (severity) { + case UU_DPRINTF_SILENT: + return (dgettext(TEXT_DOMAIN, "silent")); + case UU_DPRINTF_FATAL: + return (dgettext(TEXT_DOMAIN, "FATAL")); + case UU_DPRINTF_WARNING: + return (dgettext(TEXT_DOMAIN, "WARNING")); + case UU_DPRINTF_NOTICE: + return (dgettext(TEXT_DOMAIN, "note")); + case UU_DPRINTF_INFO: + return (dgettext(TEXT_DOMAIN, "info")); + case UU_DPRINTF_DEBUG: + return (dgettext(TEXT_DOMAIN, "debug")); + default: + return (dgettext(TEXT_DOMAIN, "unspecified")); + } +} + +uu_dprintf_t * +uu_dprintf_create(const char *name, uu_dprintf_severity_t severity, + uint_t flags) +{ + uu_dprintf_t *D; + + if (uu_check_name(name, UU_NAME_DOMAIN) == -1) { + uu_set_error(UU_ERROR_INVALID_ARGUMENT); + return (NULL); + } + + if ((D = uu_zalloc(sizeof (uu_dprintf_t))) == NULL) + return (NULL); + + if (name != NULL) { + D->uud_name = strdup(name); + if (D->uud_name == NULL) { + uu_free(D); + return (NULL); + } + } else { + D->uud_name = NULL; + } + + D->uud_severity = severity; + D->uud_flags = flags; + + return (D); +} + +/*PRINTFLIKE3*/ +void +uu_dprintf(uu_dprintf_t *D, uu_dprintf_severity_t severity, + const char *format, ...) +{ + va_list alist; + + /* XXX Assert that severity is not UU_DPRINTF_SILENT. */ + + if (severity > D->uud_severity) + return; + + (void) fprintf(stderr, FACILITY_FMT, D->uud_name, + strseverity(severity)); + + va_start(alist, format); + (void) vfprintf(stderr, format, alist); + va_end(alist); +} + +void +uu_dprintf_destroy(uu_dprintf_t *D) +{ + if (D->uud_name) + free(D->uud_name); + + uu_free(D); +} + +const char * +uu_dprintf_getname(uu_dprintf_t *D) +{ + return (D->uud_name); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_ident.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_ident.c new file mode 100644 index 0000000..9a64384 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_ident.c @@ -0,0 +1,122 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libuutil_common.h" + +#include <string.h> + +/* + * We require names of the form: + * [provider,]identifier[/[provider,]identifier]... + * + * Where provider is either a stock symbol (SUNW) or a java-style reversed + * domain name (com.sun). + * + * Both providers and identifiers must start with a letter, and may + * only contain alphanumerics, dashes, and underlines. Providers + * may also contain periods. + * + * Note that we do _not_ use the macros in <ctype.h>, since they are affected + * by the current locale settings. + */ + +#define IS_ALPHA(c) \ + (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) + +#define IS_DIGIT(c) \ + ((c) >= '0' && (c) <= '9') + +static int +is_valid_ident(const char *s, const char *e, int allowdot) +{ + char c; + + if (s >= e) + return (0); /* name is empty */ + + c = *s++; + if (!IS_ALPHA(c)) + return (0); /* does not start with letter */ + + while (s < e && (c = *s++) != 0) { + if (IS_ALPHA(c) || IS_DIGIT(c) || c == '-' || c == '_' || + (allowdot && c == '.')) + continue; + return (0); /* invalid character */ + } + return (1); +} + +static int +is_valid_component(const char *b, const char *e, uint_t flags) +{ + char *sp; + + if (flags & UU_NAME_DOMAIN) { + sp = strchr(b, ','); + if (sp != NULL && sp < e) { + if (!is_valid_ident(b, sp, 1)) + return (0); + b = sp + 1; + } + } + + return (is_valid_ident(b, e, 0)); +} + +int +uu_check_name(const char *name, uint_t flags) +{ + const char *end = name + strlen(name); + const char *p; + + if (flags & ~(UU_NAME_DOMAIN | UU_NAME_PATH)) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (-1); + } + + if (!(flags & UU_NAME_PATH)) { + if (!is_valid_component(name, end, flags)) + goto bad; + return (0); + } + + while ((p = strchr(name, '/')) != NULL) { + if (!is_valid_component(name, p - 1, flags)) + goto bad; + name = p + 1; + } + if (!is_valid_component(name, end, flags)) + goto bad; + + return (0); + +bad: + uu_set_error(UU_ERROR_INVALID_ARGUMENT); + return (-1); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c new file mode 100644 index 0000000..35c7ba8 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_list.c @@ -0,0 +1,718 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libuutil_common.h" + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/time.h> + +#define ELEM_TO_NODE(lp, e) \ + ((uu_list_node_impl_t *)((uintptr_t)(e) + (lp)->ul_offset)) + +#define NODE_TO_ELEM(lp, n) \ + ((void *)((uintptr_t)(n) - (lp)->ul_offset)) + +/* + * uu_list_index_ts define a location for insertion. They are simply a + * pointer to the object after the insertion point. We store a mark + * in the low-bits of the index, to help prevent mistakes. + * + * When debugging, the index mark changes on every insert and delete, to + * catch stale references. + */ +#define INDEX_MAX (sizeof (uintptr_t) - 1) +#define INDEX_NEXT(m) (((m) == INDEX_MAX)? 1 : ((m) + 1) & INDEX_MAX) + +#define INDEX_TO_NODE(i) ((uu_list_node_impl_t *)((i) & ~INDEX_MAX)) +#define NODE_TO_INDEX(p, n) (((uintptr_t)(n) & ~INDEX_MAX) | (p)->ul_index) +#define INDEX_VALID(p, i) (((i) & INDEX_MAX) == (p)->ul_index) +#define INDEX_CHECK(i) (((i) & INDEX_MAX) != 0) + +#define POOL_TO_MARKER(pp) ((void *)((uintptr_t)(pp) | 1)) + +static uu_list_pool_t uu_null_lpool = { &uu_null_lpool, &uu_null_lpool }; +static pthread_mutex_t uu_lpool_list_lock = PTHREAD_MUTEX_INITIALIZER; + +uu_list_pool_t * +uu_list_pool_create(const char *name, size_t objsize, + size_t nodeoffset, uu_compare_fn_t *compare_func, uint32_t flags) +{ + uu_list_pool_t *pp, *next, *prev; + + if (name == NULL || + uu_check_name(name, UU_NAME_DOMAIN) == -1 || + nodeoffset + sizeof (uu_list_node_t) > objsize) { + uu_set_error(UU_ERROR_INVALID_ARGUMENT); + return (NULL); + } + + if (flags & ~UU_LIST_POOL_DEBUG) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (NULL); + } + + pp = uu_zalloc(sizeof (uu_list_pool_t)); + if (pp == NULL) { + uu_set_error(UU_ERROR_NO_MEMORY); + return (NULL); + } + + (void) strlcpy(pp->ulp_name, name, sizeof (pp->ulp_name)); + pp->ulp_nodeoffset = nodeoffset; + pp->ulp_objsize = objsize; + pp->ulp_cmp = compare_func; + if (flags & UU_LIST_POOL_DEBUG) + pp->ulp_debug = 1; + pp->ulp_last_index = 0; + + (void) pthread_mutex_init(&pp->ulp_lock, NULL); + + pp->ulp_null_list.ul_next_enc = UU_PTR_ENCODE(&pp->ulp_null_list); + pp->ulp_null_list.ul_prev_enc = UU_PTR_ENCODE(&pp->ulp_null_list); + + (void) pthread_mutex_lock(&uu_lpool_list_lock); + pp->ulp_next = next = &uu_null_lpool; + pp->ulp_prev = prev = next->ulp_prev; + next->ulp_prev = pp; + prev->ulp_next = pp; + (void) pthread_mutex_unlock(&uu_lpool_list_lock); + + return (pp); +} + +void +uu_list_pool_destroy(uu_list_pool_t *pp) +{ + if (pp->ulp_debug) { + if (pp->ulp_null_list.ul_next_enc != + UU_PTR_ENCODE(&pp->ulp_null_list) || + pp->ulp_null_list.ul_prev_enc != + UU_PTR_ENCODE(&pp->ulp_null_list)) { + uu_panic("uu_list_pool_destroy: Pool \"%.*s\" (%p) has " + "outstanding lists, or is corrupt.\n", + (int)sizeof (pp->ulp_name), pp->ulp_name, + (void *)pp); + } + } + (void) pthread_mutex_lock(&uu_lpool_list_lock); + pp->ulp_next->ulp_prev = pp->ulp_prev; + pp->ulp_prev->ulp_next = pp->ulp_next; + (void) pthread_mutex_unlock(&uu_lpool_list_lock); + pp->ulp_prev = NULL; + pp->ulp_next = NULL; + uu_free(pp); +} + +void +uu_list_node_init(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp) +{ + uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg; + + if (pp->ulp_debug) { + uintptr_t offset = (uintptr_t)np - (uintptr_t)base; + if (offset + sizeof (*np) > pp->ulp_objsize) { + uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): " + "offset %ld doesn't fit in object (size %ld)\n", + base, (void *)np, (void *)pp, pp->ulp_name, + (long)offset, (long)pp->ulp_objsize); + } + if (offset != pp->ulp_nodeoffset) { + uu_panic("uu_list_node_init(%p, %p, %p (\"%s\")): " + "offset %ld doesn't match pool's offset (%ld)\n", + base, (void *)np, (void *)pp, pp->ulp_name, + (long)offset, (long)pp->ulp_objsize); + } + } + np->uln_next = POOL_TO_MARKER(pp); + np->uln_prev = NULL; +} + +void +uu_list_node_fini(void *base, uu_list_node_t *np_arg, uu_list_pool_t *pp) +{ + uu_list_node_impl_t *np = (uu_list_node_impl_t *)np_arg; + + if (pp->ulp_debug) { + if (np->uln_next == NULL && + np->uln_prev == NULL) { + uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): " + "node already finied\n", + base, (void *)np_arg, (void *)pp, pp->ulp_name); + } + if (np->uln_next != POOL_TO_MARKER(pp) || + np->uln_prev != NULL) { + uu_panic("uu_list_node_fini(%p, %p, %p (\"%s\")): " + "node corrupt or on list\n", + base, (void *)np_arg, (void *)pp, pp->ulp_name); + } + } + np->uln_next = NULL; + np->uln_prev = NULL; +} + +uu_list_t * +uu_list_create(uu_list_pool_t *pp, void *parent, uint32_t flags) +{ + uu_list_t *lp, *next, *prev; + + if (flags & ~(UU_LIST_DEBUG | UU_LIST_SORTED)) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (NULL); + } + + if ((flags & UU_LIST_SORTED) && pp->ulp_cmp == NULL) { + if (pp->ulp_debug) + uu_panic("uu_list_create(%p, ...): requested " + "UU_LIST_SORTED, but pool has no comparison func\n", + (void *)pp); + uu_set_error(UU_ERROR_NOT_SUPPORTED); + return (NULL); + } + + lp = uu_zalloc(sizeof (*lp)); + if (lp == NULL) { + uu_set_error(UU_ERROR_NO_MEMORY); + return (NULL); + } + + lp->ul_pool = pp; + lp->ul_parent_enc = UU_PTR_ENCODE(parent); + lp->ul_offset = pp->ulp_nodeoffset; + lp->ul_debug = pp->ulp_debug || (flags & UU_LIST_DEBUG); + lp->ul_sorted = (flags & UU_LIST_SORTED); + lp->ul_numnodes = 0; + lp->ul_index = (pp->ulp_last_index = INDEX_NEXT(pp->ulp_last_index)); + + lp->ul_null_node.uln_next = &lp->ul_null_node; + lp->ul_null_node.uln_prev = &lp->ul_null_node; + + lp->ul_null_walk.ulw_next = &lp->ul_null_walk; + lp->ul_null_walk.ulw_prev = &lp->ul_null_walk; + + (void) pthread_mutex_lock(&pp->ulp_lock); + next = &pp->ulp_null_list; + prev = UU_PTR_DECODE(next->ul_prev_enc); + lp->ul_next_enc = UU_PTR_ENCODE(next); + lp->ul_prev_enc = UU_PTR_ENCODE(prev); + next->ul_prev_enc = UU_PTR_ENCODE(lp); + prev->ul_next_enc = UU_PTR_ENCODE(lp); + (void) pthread_mutex_unlock(&pp->ulp_lock); + + return (lp); +} + +void +uu_list_destroy(uu_list_t *lp) +{ + uu_list_pool_t *pp = lp->ul_pool; + + if (lp->ul_debug) { + if (lp->ul_null_node.uln_next != &lp->ul_null_node || + lp->ul_null_node.uln_prev != &lp->ul_null_node) { + uu_panic("uu_list_destroy(%p): list not empty\n", + (void *)lp); + } + if (lp->ul_numnodes != 0) { + uu_panic("uu_list_destroy(%p): numnodes is nonzero, " + "but list is empty\n", (void *)lp); + } + if (lp->ul_null_walk.ulw_next != &lp->ul_null_walk || + lp->ul_null_walk.ulw_prev != &lp->ul_null_walk) { + uu_panic("uu_list_destroy(%p): outstanding walkers\n", + (void *)lp); + } + } + + (void) pthread_mutex_lock(&pp->ulp_lock); + UU_LIST_PTR(lp->ul_next_enc)->ul_prev_enc = lp->ul_prev_enc; + UU_LIST_PTR(lp->ul_prev_enc)->ul_next_enc = lp->ul_next_enc; + (void) pthread_mutex_unlock(&pp->ulp_lock); + lp->ul_prev_enc = UU_PTR_ENCODE(NULL); + lp->ul_next_enc = UU_PTR_ENCODE(NULL); + lp->ul_pool = NULL; + uu_free(lp); +} + +static void +list_insert(uu_list_t *lp, uu_list_node_impl_t *np, uu_list_node_impl_t *prev, + uu_list_node_impl_t *next) +{ + if (lp->ul_debug) { + if (next->uln_prev != prev || prev->uln_next != next) + uu_panic("insert(%p): internal error: %p and %p not " + "neighbors\n", (void *)lp, (void *)next, + (void *)prev); + + if (np->uln_next != POOL_TO_MARKER(lp->ul_pool) || + np->uln_prev != NULL) { + uu_panic("insert(%p): elem %p node %p corrupt, " + "not initialized, or already in a list.\n", + (void *)lp, NODE_TO_ELEM(lp, np), (void *)np); + } + /* + * invalidate outstanding uu_list_index_ts. + */ + lp->ul_index = INDEX_NEXT(lp->ul_index); + } + np->uln_next = next; + np->uln_prev = prev; + next->uln_prev = np; + prev->uln_next = np; + + lp->ul_numnodes++; +} + +void +uu_list_insert(uu_list_t *lp, void *elem, uu_list_index_t idx) +{ + uu_list_node_impl_t *np; + + np = INDEX_TO_NODE(idx); + if (np == NULL) + np = &lp->ul_null_node; + + if (lp->ul_debug) { + if (!INDEX_VALID(lp, idx)) + uu_panic("uu_list_insert(%p, %p, %p): %s\n", + (void *)lp, elem, (void *)idx, + INDEX_CHECK(idx)? "outdated index" : + "invalid index"); + if (np->uln_prev == NULL) + uu_panic("uu_list_insert(%p, %p, %p): out-of-date " + "index\n", (void *)lp, elem, (void *)idx); + } + + list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np); +} + +void * +uu_list_find(uu_list_t *lp, void *elem, void *private, uu_list_index_t *out) +{ + int sorted = lp->ul_sorted; + uu_compare_fn_t *func = lp->ul_pool->ulp_cmp; + uu_list_node_impl_t *np; + + if (func == NULL) { + if (out != NULL) + *out = 0; + uu_set_error(UU_ERROR_NOT_SUPPORTED); + return (NULL); + } + for (np = lp->ul_null_node.uln_next; np != &lp->ul_null_node; + np = np->uln_next) { + void *ep = NODE_TO_ELEM(lp, np); + int cmp = func(ep, elem, private); + if (cmp == 0) { + if (out != NULL) + *out = NODE_TO_INDEX(lp, np); + return (ep); + } + if (sorted && cmp > 0) { + if (out != NULL) + *out = NODE_TO_INDEX(lp, np); + return (NULL); + } + } + if (out != NULL) + *out = NODE_TO_INDEX(lp, 0); + return (NULL); +} + +void * +uu_list_nearest_next(uu_list_t *lp, uu_list_index_t idx) +{ + uu_list_node_impl_t *np = INDEX_TO_NODE(idx); + + if (np == NULL) + np = &lp->ul_null_node; + + if (lp->ul_debug) { + if (!INDEX_VALID(lp, idx)) + uu_panic("uu_list_nearest_next(%p, %p): %s\n", + (void *)lp, (void *)idx, + INDEX_CHECK(idx)? "outdated index" : + "invalid index"); + if (np->uln_prev == NULL) + uu_panic("uu_list_nearest_next(%p, %p): out-of-date " + "index\n", (void *)lp, (void *)idx); + } + + if (np == &lp->ul_null_node) + return (NULL); + else + return (NODE_TO_ELEM(lp, np)); +} + +void * +uu_list_nearest_prev(uu_list_t *lp, uu_list_index_t idx) +{ + uu_list_node_impl_t *np = INDEX_TO_NODE(idx); + + if (np == NULL) + np = &lp->ul_null_node; + + if (lp->ul_debug) { + if (!INDEX_VALID(lp, idx)) + uu_panic("uu_list_nearest_prev(%p, %p): %s\n", + (void *)lp, (void *)idx, INDEX_CHECK(idx)? + "outdated index" : "invalid index"); + if (np->uln_prev == NULL) + uu_panic("uu_list_nearest_prev(%p, %p): out-of-date " + "index\n", (void *)lp, (void *)idx); + } + + if ((np = np->uln_prev) == &lp->ul_null_node) + return (NULL); + else + return (NODE_TO_ELEM(lp, np)); +} + +static void +list_walk_init(uu_list_walk_t *wp, uu_list_t *lp, uint32_t flags) +{ + uu_list_walk_t *next, *prev; + + int robust = (flags & UU_WALK_ROBUST); + int direction = (flags & UU_WALK_REVERSE)? -1 : 1; + + (void) memset(wp, 0, sizeof (*wp)); + wp->ulw_list = lp; + wp->ulw_robust = robust; + wp->ulw_dir = direction; + if (direction > 0) + wp->ulw_next_result = lp->ul_null_node.uln_next; + else + wp->ulw_next_result = lp->ul_null_node.uln_prev; + + if (lp->ul_debug || robust) { + /* + * Add this walker to the list's list of walkers so + * uu_list_remove() can advance us if somebody tries to + * remove ulw_next_result. + */ + wp->ulw_next = next = &lp->ul_null_walk; + wp->ulw_prev = prev = next->ulw_prev; + next->ulw_prev = wp; + prev->ulw_next = wp; + } +} + +static uu_list_node_impl_t * +list_walk_advance(uu_list_walk_t *wp, uu_list_t *lp) +{ + uu_list_node_impl_t *np = wp->ulw_next_result; + uu_list_node_impl_t *next; + + if (np == &lp->ul_null_node) + return (NULL); + + next = (wp->ulw_dir > 0)? np->uln_next : np->uln_prev; + + wp->ulw_next_result = next; + return (np); +} + +static void +list_walk_fini(uu_list_walk_t *wp) +{ + /* GLXXX debugging? */ + if (wp->ulw_next != NULL) { + wp->ulw_next->ulw_prev = wp->ulw_prev; + wp->ulw_prev->ulw_next = wp->ulw_next; + wp->ulw_next = NULL; + wp->ulw_prev = NULL; + } + wp->ulw_list = NULL; + wp->ulw_next_result = NULL; +} + +uu_list_walk_t * +uu_list_walk_start(uu_list_t *lp, uint32_t flags) +{ + uu_list_walk_t *wp; + + if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (NULL); + } + + wp = uu_zalloc(sizeof (*wp)); + if (wp == NULL) { + uu_set_error(UU_ERROR_NO_MEMORY); + return (NULL); + } + + list_walk_init(wp, lp, flags); + return (wp); +} + +void * +uu_list_walk_next(uu_list_walk_t *wp) +{ + uu_list_t *lp = wp->ulw_list; + uu_list_node_impl_t *np = list_walk_advance(wp, lp); + + if (np == NULL) + return (NULL); + + return (NODE_TO_ELEM(lp, np)); +} + +void +uu_list_walk_end(uu_list_walk_t *wp) +{ + list_walk_fini(wp); + uu_free(wp); +} + +int +uu_list_walk(uu_list_t *lp, uu_walk_fn_t *func, void *private, uint32_t flags) +{ + uu_list_node_impl_t *np; + + int status = UU_WALK_NEXT; + + int robust = (flags & UU_WALK_ROBUST); + int reverse = (flags & UU_WALK_REVERSE); + + if (flags & ~(UU_WALK_ROBUST | UU_WALK_REVERSE)) { + uu_set_error(UU_ERROR_UNKNOWN_FLAG); + return (-1); + } + + if (lp->ul_debug || robust) { + uu_list_walk_t my_walk; + void *e; + + list_walk_init(&my_walk, lp, flags); + while (status == UU_WALK_NEXT && + (e = uu_list_walk_next(&my_walk)) != NULL) + status = (*func)(e, private); + list_walk_fini(&my_walk); + } else { + if (!reverse) { + for (np = lp->ul_null_node.uln_next; + status == UU_WALK_NEXT && np != &lp->ul_null_node; + np = np->uln_next) { + status = (*func)(NODE_TO_ELEM(lp, np), private); + } + } else { + for (np = lp->ul_null_node.uln_prev; + status == UU_WALK_NEXT && np != &lp->ul_null_node; + np = np->uln_prev) { + status = (*func)(NODE_TO_ELEM(lp, np), private); + } + } + } + if (status >= 0) + return (0); + uu_set_error(UU_ERROR_CALLBACK_FAILED); + return (-1); +} + +void +uu_list_remove(uu_list_t *lp, void *elem) +{ + uu_list_node_impl_t *np = ELEM_TO_NODE(lp, elem); + uu_list_walk_t *wp; + + if (lp->ul_debug) { + if (np->uln_prev == NULL) + uu_panic("uu_list_remove(%p, %p): elem not on list\n", + (void *)lp, elem); + /* + * invalidate outstanding uu_list_index_ts. + */ + lp->ul_index = INDEX_NEXT(lp->ul_index); + } + + /* + * robust walkers must be advanced. In debug mode, non-robust + * walkers are also on the list. If there are any, it's an error. + */ + for (wp = lp->ul_null_walk.ulw_next; wp != &lp->ul_null_walk; + wp = wp->ulw_next) { + if (wp->ulw_robust) { + if (np == wp->ulw_next_result) + (void) list_walk_advance(wp, lp); + } else if (wp->ulw_next_result != NULL) { + uu_panic("uu_list_remove(%p, %p): active non-robust " + "walker\n", (void *)lp, elem); + } + } + + np->uln_next->uln_prev = np->uln_prev; + np->uln_prev->uln_next = np->uln_next; + + lp->ul_numnodes--; + + np->uln_next = POOL_TO_MARKER(lp->ul_pool); + np->uln_prev = NULL; +} + +void * +uu_list_teardown(uu_list_t *lp, void **cookie) +{ + void *ep; + + /* + * XXX: disable list modification until list is empty + */ + if (lp->ul_debug && *cookie != NULL) + uu_panic("uu_list_teardown(%p, %p): unexpected cookie\n", + (void *)lp, (void *)cookie); + + ep = uu_list_first(lp); + if (ep) + uu_list_remove(lp, ep); + return (ep); +} + +int +uu_list_insert_before(uu_list_t *lp, void *target, void *elem) +{ + uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target); + + if (target == NULL) + np = &lp->ul_null_node; + + if (lp->ul_debug) { + if (np->uln_prev == NULL) + uu_panic("uu_list_insert_before(%p, %p, %p): %p is " + "not currently on a list\n", + (void *)lp, target, elem, target); + } + if (lp->ul_sorted) { + if (lp->ul_debug) + uu_panic("uu_list_insert_before(%p, ...): list is " + "UU_LIST_SORTED\n", (void *)lp); + uu_set_error(UU_ERROR_NOT_SUPPORTED); + return (-1); + } + + list_insert(lp, ELEM_TO_NODE(lp, elem), np->uln_prev, np); + return (0); +} + +int +uu_list_insert_after(uu_list_t *lp, void *target, void *elem) +{ + uu_list_node_impl_t *np = ELEM_TO_NODE(lp, target); + + if (target == NULL) + np = &lp->ul_null_node; + + if (lp->ul_debug) { + if (np->uln_prev == NULL) + uu_panic("uu_list_insert_after(%p, %p, %p): %p is " + "not currently on a list\n", + (void *)lp, target, elem, target); + } + if (lp->ul_sorted) { + if (lp->ul_debug) + uu_panic("uu_list_insert_after(%p, ...): list is " + "UU_LIST_SORTED\n", (void *)lp); + uu_set_error(UU_ERROR_NOT_SUPPORTED); + return (-1); + } + + list_insert(lp, ELEM_TO_NODE(lp, elem), np, np->uln_next); + return (0); +} + +size_t +uu_list_numnodes(uu_list_t *lp) +{ + return (lp->ul_numnodes); +} + +void * +uu_list_first(uu_list_t *lp) +{ + uu_list_node_impl_t *n = lp->ul_null_node.uln_next; + if (n == &lp->ul_null_node) + return (NULL); + return (NODE_TO_ELEM(lp, n)); +} + +void * +uu_list_last(uu_list_t *lp) +{ + uu_list_node_impl_t *n = lp->ul_null_node.uln_prev; + if (n == &lp->ul_null_node) + return (NULL); + return (NODE_TO_ELEM(lp, n)); +} + +void * +uu_list_next(uu_list_t *lp, void *elem) +{ + uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem); + + n = n->uln_next; + if (n == &lp->ul_null_node) + return (NULL); + return (NODE_TO_ELEM(lp, n)); +} + +void * +uu_list_prev(uu_list_t *lp, void *elem) +{ + uu_list_node_impl_t *n = ELEM_TO_NODE(lp, elem); + + n = n->uln_prev; + if (n == &lp->ul_null_node) + return (NULL); + return (NODE_TO_ELEM(lp, n)); +} + +/* + * called from uu_lockup() and uu_release(), as part of our fork1()-safety. + */ +void +uu_list_lockup(void) +{ + uu_list_pool_t *pp; + + (void) pthread_mutex_lock(&uu_lpool_list_lock); + for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool; + pp = pp->ulp_next) + (void) pthread_mutex_lock(&pp->ulp_lock); +} + +void +uu_list_release(void) +{ + uu_list_pool_t *pp; + + for (pp = uu_null_lpool.ulp_next; pp != &uu_null_lpool; + pp = pp->ulp_next) + (void) pthread_mutex_unlock(&pp->ulp_lock); + (void) pthread_mutex_unlock(&uu_lpool_list_lock); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c new file mode 100644 index 0000000..b673834 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c @@ -0,0 +1,277 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include "libuutil_common.h" + +#define HAVE_ASSFAIL 1 + +#include <assert.h> +#include <errno.h> +#include <libintl.h> +#include <pthread.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/debug.h> +#include <thread.h> +#include <unistd.h> +#include <ctype.h> + +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + +/* + * All of the old code under !defined(PTHREAD_ONCE_KEY_NP) + * is here to enable the building of a native version of + * libuutil.so when the build machine has not yet been upgraded + * to a version of libc that provides pthread_key_create_once_np(). + * It should all be deleted when solaris_nevada ships. + * The code is not MT-safe in a relaxed memory model. + */ + +#if defined(PTHREAD_ONCE_KEY_NP) +static pthread_key_t uu_error_key = PTHREAD_ONCE_KEY_NP; +#else /* PTHREAD_ONCE_KEY_NP */ +static pthread_key_t uu_error_key = 0; +static pthread_mutex_t uu_key_lock = PTHREAD_MUTEX_INITIALIZER; +#endif /* PTHREAD_ONCE_KEY_NP */ + +static int uu_error_key_setup = 0; + +static pthread_mutex_t uu_panic_lock = PTHREAD_MUTEX_INITIALIZER; +/* LINTED static unused */ +static const char *uu_panic_format; +/* LINTED static unused */ +static va_list uu_panic_args; +static pthread_t uu_panic_thread; + +static uint32_t _uu_main_error; + +void +uu_set_error(uint_t code) +{ + +#if defined(PTHREAD_ONCE_KEY_NP) + if (pthread_key_create_once_np(&uu_error_key, NULL) != 0) + uu_error_key_setup = -1; + else + uu_error_key_setup = 1; +#else /* PTHREAD_ONCE_KEY_NP */ + if (uu_error_key_setup == 0) { + (void) pthread_mutex_lock(&uu_key_lock); + if (uu_error_key_setup == 0) { + if (pthread_key_create(&uu_error_key, NULL) != 0) + uu_error_key_setup = -1; + else + uu_error_key_setup = 1; + } + (void) pthread_mutex_unlock(&uu_key_lock); + } +#endif /* PTHREAD_ONCE_KEY_NP */ + if (uu_error_key_setup > 0) + (void) pthread_setspecific(uu_error_key, + (void *)(uintptr_t)code); +} + +uint32_t +uu_error(void) +{ + + if (uu_error_key_setup < 0) /* can't happen? */ + return (UU_ERROR_UNKNOWN); + + /* + * Because UU_ERROR_NONE == 0, if uu_set_error() was + * never called, then this will return UU_ERROR_NONE: + */ + return ((uint32_t)(uintptr_t)pthread_getspecific(uu_error_key)); +} + +const char * +uu_strerror(uint32_t code) +{ + const char *str; + + switch (code) { + case UU_ERROR_NONE: + str = dgettext(TEXT_DOMAIN, "No error"); + break; + + case UU_ERROR_INVALID_ARGUMENT: + str = dgettext(TEXT_DOMAIN, "Invalid argument"); + break; + + case UU_ERROR_UNKNOWN_FLAG: + str = dgettext(TEXT_DOMAIN, "Unknown flag passed"); + break; + + case UU_ERROR_NO_MEMORY: + str = dgettext(TEXT_DOMAIN, "Out of memory"); + break; + + case UU_ERROR_CALLBACK_FAILED: + str = dgettext(TEXT_DOMAIN, "Callback-initiated failure"); + break; + + case UU_ERROR_NOT_SUPPORTED: + str = dgettext(TEXT_DOMAIN, "Operation not supported"); + break; + + case UU_ERROR_EMPTY: + str = dgettext(TEXT_DOMAIN, "No value provided"); + break; + + case UU_ERROR_UNDERFLOW: + str = dgettext(TEXT_DOMAIN, "Value too small"); + break; + + case UU_ERROR_OVERFLOW: + str = dgettext(TEXT_DOMAIN, "Value too large"); + break; + + case UU_ERROR_INVALID_CHAR: + str = dgettext(TEXT_DOMAIN, + "Value contains unexpected character"); + break; + + case UU_ERROR_INVALID_DIGIT: + str = dgettext(TEXT_DOMAIN, + "Value contains digit not in base"); + break; + + case UU_ERROR_SYSTEM: + str = dgettext(TEXT_DOMAIN, "Underlying system error"); + break; + + case UU_ERROR_UNKNOWN: + str = dgettext(TEXT_DOMAIN, "Error status not known"); + break; + + default: + errno = ESRCH; + str = NULL; + break; + } + return (str); +} + +void +uu_panic(const char *format, ...) +{ + va_list args; + + va_start(args, format); + + (void) pthread_mutex_lock(&uu_panic_lock); + if (uu_panic_thread == 0) { + uu_panic_thread = pthread_self(); + uu_panic_format = format; + va_copy(uu_panic_args, args); + } + (void) pthread_mutex_unlock(&uu_panic_lock); + + (void) vfprintf(stderr, format, args); + + if (uu_panic_thread == pthread_self()) + abort(); + else + for (;;) + (void) pause(); +} + +int +assfail(const char *astring, const char *file, int line) +{ + __assert(astring, file, line); + /*NOTREACHED*/ + return (0); +} + +static void +uu_lockup(void) +{ + (void) pthread_mutex_lock(&uu_panic_lock); +#if !defined(PTHREAD_ONCE_KEY_NP) + (void) pthread_mutex_lock(&uu_key_lock); +#endif + uu_avl_lockup(); + uu_list_lockup(); +} + +static void +uu_release(void) +{ + (void) pthread_mutex_unlock(&uu_panic_lock); +#if !defined(PTHREAD_ONCE_KEY_NP) + (void) pthread_mutex_unlock(&uu_key_lock); +#endif + uu_avl_release(); + uu_list_release(); +} + +static void +uu_release_child(void) +{ + uu_panic_format = NULL; + uu_panic_thread = 0; + + uu_release(); +} + +#pragma init(uu_init) +static void +uu_init(void) +{ + (void) pthread_atfork(uu_lockup, uu_release, uu_release_child); +} + +/* + * Dump a block of memory in hex+ascii, for debugging + */ +void +uu_dump(FILE *out, const char *prefix, const void *buf, size_t len) +{ + const unsigned char *p = buf; + int i; + + for (i = 0; i < len; i += 16) { + int j; + + (void) fprintf(out, "%s", prefix); + for (j = 0; j < 16 && i + j < len; j++) { + (void) fprintf(out, "%2.2x ", p[i + j]); + } + for (; j < 16; j++) { + (void) fprintf(out, " "); + } + for (j = 0; j < 16 && i + j < len; j++) { + (void) fprintf(out, "%c", + isprint(p[i + j]) ? p[i + j] : '.'); + } + (void) fprintf(out, "\n"); + } +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_open.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_open.c new file mode 100644 index 0000000..7256662 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_open.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libuutil_common.h" + +#include <sys/time.h> + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <unistd.h> + +#ifdef _LP64 +#define TMPPATHFMT "%s/uu%ld" +#else /* _LP64 */ +#define TMPPATHFMT "%s/uu%lld" +#endif /* _LP64 */ + +/*ARGSUSED*/ +int +uu_open_tmp(const char *dir, uint_t uflags) +{ + int f; + char *fname = uu_zalloc(PATH_MAX); + + if (fname == NULL) + return (-1); + + for (;;) { + (void) snprintf(fname, PATH_MAX, "%s/uu%lld", dir, gethrtime()); + + f = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600); + + if (f >= 0 || errno != EEXIST) + break; + } + + if (f >= 0) + (void) unlink(fname); + + uu_free(fname); + + return (f); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_pname.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_pname.c new file mode 100644 index 0000000..20626ac --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_pname.c @@ -0,0 +1,205 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libuutil_common.h" + +#include <libintl.h> +#include <limits.h> +#include <string.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> +#include <errno.h> +#include <wchar.h> +#include <unistd.h> + +static const char PNAME_FMT[] = "%s: "; +static const char ERRNO_FMT[] = ": %s\n"; + +static const char *pname; + +static void +uu_die_internal(int status, const char *format, va_list alist) __NORETURN; + +int uu_exit_ok_value = EXIT_SUCCESS; +int uu_exit_fatal_value = EXIT_FAILURE; +int uu_exit_usage_value = 2; + +int * +uu_exit_ok(void) +{ + return (&uu_exit_ok_value); +} + +int * +uu_exit_fatal(void) +{ + return (&uu_exit_fatal_value); +} + +int * +uu_exit_usage(void) +{ + return (&uu_exit_usage_value); +} + +void +uu_alt_exit(int profile) +{ + switch (profile) { + case UU_PROFILE_DEFAULT: + uu_exit_ok_value = EXIT_SUCCESS; + uu_exit_fatal_value = EXIT_FAILURE; + uu_exit_usage_value = 2; + break; + case UU_PROFILE_LAUNCHER: + uu_exit_ok_value = EXIT_SUCCESS; + uu_exit_fatal_value = 124; + uu_exit_usage_value = 125; + break; + } +} + +static void +uu_warn_internal(int err, const char *format, va_list alist) +{ + if (pname != NULL) + (void) fprintf(stderr, PNAME_FMT, pname); + + (void) vfprintf(stderr, format, alist); + + if (strrchr(format, '\n') == NULL) + (void) fprintf(stderr, ERRNO_FMT, strerror(err)); +} + +void +uu_vwarn(const char *format, va_list alist) +{ + uu_warn_internal(errno, format, alist); +} + +/*PRINTFLIKE1*/ +void +uu_warn(const char *format, ...) +{ + va_list alist; + va_start(alist, format); + uu_warn_internal(errno, format, alist); + va_end(alist); +} + +static void +uu_die_internal(int status, const char *format, va_list alist) +{ + uu_warn_internal(errno, format, alist); +#ifdef DEBUG + { + char *cp; + + if (!issetugid()) { + cp = getenv("UU_DIE_ABORTS"); + if (cp != NULL && *cp != '\0') + abort(); + } + } +#endif + exit(status); +} + +void +uu_vdie(const char *format, va_list alist) +{ + uu_die_internal(UU_EXIT_FATAL, format, alist); +} + +/*PRINTFLIKE1*/ +void +uu_die(const char *format, ...) +{ + va_list alist; + va_start(alist, format); + uu_die_internal(UU_EXIT_FATAL, format, alist); + va_end(alist); +} + +void +uu_vxdie(int status, const char *format, va_list alist) +{ + uu_die_internal(status, format, alist); +} + +/*PRINTFLIKE2*/ +void +uu_xdie(int status, const char *format, ...) +{ + va_list alist; + va_start(alist, format); + uu_die_internal(status, format, alist); + va_end(alist); +} + +const char * +uu_setpname(char *arg0) +{ + /* + * Having a NULL argv[0], while uncommon, is possible. It + * makes more sense to handle this event in uu_setpname rather + * than in each of its consumers. + */ + if (arg0 == NULL) { + pname = "unknown_command"; + return (pname); + } + + /* + * Guard against '/' at end of command invocation. + */ + for (;;) { + char *p = strrchr(arg0, '/'); + if (p == NULL) { + pname = arg0; + break; + } else { + if (*(p + 1) == '\0') { + *p = '\0'; + continue; + } + + pname = p + 1; + break; + } + } + + return (pname); +} + +const char * +uu_getpname(void) +{ + return (pname); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c new file mode 100644 index 0000000..66afba0 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * String helper functions + */ + +#include <string.h> +#include <sys/types.h> +#include <stdio.h> +#include <malloc.h> +#include <ctype.h> +#include "libuutil.h" + +/* Return true if strings are equal */ +boolean_t +uu_streq(const char *a, const char *b) +{ + return (strcmp(a, b) == 0); +} + +/* Return true if strings are equal, case-insensitively */ +boolean_t +uu_strcaseeq(const char *a, const char *b) +{ + return (strcasecmp(a, b) == 0); +} + +/* Return true if string a Begins With string b */ +boolean_t +uu_strbw(const char *a, const char *b) +{ + return (strncmp(a, b, strlen(b)) == 0); +} diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c new file mode 100644 index 0000000..8fd1148 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_strtoint.c @@ -0,0 +1,300 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libuutil_common.h" + +#include <limits.h> +#include <ctype.h> + +#define MAX_BASE 36 + +#define IS_DIGIT(x) ((x) >= '0' && (x) <= '9') + +#define CTOI(x) (((x) >= '0' && (x) <= '9') ? (x) - '0' : \ + ((x) >= 'a' && (x) <= 'z') ? (x) + 10 - 'a' : (x) + 10 - 'A') + +static int +strtoint(const char *s_arg, uint64_t *out, uint32_t base, int sign) +{ + const unsigned char *s = (const unsigned char *)s_arg; + + uint64_t val = 0; + uint64_t multmax; + + unsigned c, i; + + int neg = 0; + + int bad_digit = 0; + int bad_char = 0; + int overflow = 0; + + if (s == NULL || base == 1 || base > MAX_BASE) { + uu_set_error(UU_ERROR_INVALID_ARGUMENT); + return (-1); + } + + while ((c = *s) != 0 && isspace(c)) + s++; + + switch (c) { + case '-': + if (!sign) + overflow = 1; /* becomes underflow below */ + neg = 1; + /*FALLTHRU*/ + case '+': + c = *++s; + break; + default: + break; + } + + if (c == '\0') { + uu_set_error(UU_ERROR_EMPTY); + return (-1); + } + + if (base == 0) { + if (c != '0') + base = 10; + else if (s[1] == 'x' || s[1] == 'X') + base = 16; + else + base = 8; + } + + if (base == 16 && c == '0' && (s[1] == 'x' || s[1] == 'X')) + c = *(s += 2); + + if ((val = CTOI(c)) >= base) { + if (IS_DIGIT(c)) + bad_digit = 1; + else + bad_char = 1; + val = 0; + } + + multmax = (uint64_t)UINT64_MAX / (uint64_t)base; + + for (c = *++s; c != '\0'; c = *++s) { + if ((i = CTOI(c)) >= base) { + if (isspace(c)) + break; + if (IS_DIGIT(c)) + bad_digit = 1; + else + bad_char = 1; + i = 0; + } + + if (val > multmax) + overflow = 1; + + val *= base; + if ((uint64_t)UINT64_MAX - val < (uint64_t)i) + overflow = 1; + + val += i; + } + + while ((c = *s) != 0) { + if (!isspace(c)) + bad_char = 1; + s++; + } + + if (sign) { + if (neg) { + if (val > -(uint64_t)INT64_MIN) + overflow = 1; + } else { + if (val > INT64_MAX) + overflow = 1; + } + } + + if (neg) + val = -val; + + if (bad_char | bad_digit | overflow) { + if (bad_char) + uu_set_error(UU_ERROR_INVALID_CHAR); + else if (bad_digit) + uu_set_error(UU_ERROR_INVALID_DIGIT); + else if (overflow) { + if (neg) + uu_set_error(UU_ERROR_UNDERFLOW); + else + uu_set_error(UU_ERROR_OVERFLOW); + } + return (-1); + } + + *out = val; + return (0); +} + +int +uu_strtoint(const char *s, void *v, size_t sz, int base, + int64_t min, int64_t max) +{ + uint64_t val_u; + int64_t val; + + if (min > max) + goto bad_argument; + + switch (sz) { + case 1: + if (max > INT8_MAX || min < INT8_MIN) + goto bad_argument; + break; + case 2: + if (max > INT16_MAX || min < INT16_MIN) + goto bad_argument; + break; + case 4: + if (max > INT32_MAX || min < INT32_MIN) + goto bad_argument; + break; + case 8: + if (max > INT64_MAX || min < INT64_MIN) + goto bad_argument; + break; + default: + goto bad_argument; + } + + if (min == 0 && max == 0) { + min = -(1ULL << (8 * sz - 1)); + max = (1ULL << (8 * sz - 1)) - 1; + } + + if (strtoint(s, &val_u, base, 1) == -1) + return (-1); + + val = (int64_t)val_u; + + if (val < min) { + uu_set_error(UU_ERROR_UNDERFLOW); + return (-1); + } else if (val > max) { + uu_set_error(UU_ERROR_OVERFLOW); + return (-1); + } + + switch (sz) { + case 1: + *(int8_t *)v = val; + return (0); + case 2: + *(int16_t *)v = val; + return (0); + case 4: + *(int32_t *)v = val; + return (0); + case 8: + *(int64_t *)v = val; + return (0); + default: + break; /* fall through to bad_argument */ + } + +bad_argument: + uu_set_error(UU_ERROR_INVALID_ARGUMENT); + return (-1); +} + +int +uu_strtouint(const char *s, void *v, size_t sz, int base, + uint64_t min, uint64_t max) +{ + uint64_t val; + + if (min > max) + goto bad_argument; + + switch (sz) { + case 1: + if (max > UINT8_MAX) + goto bad_argument; + break; + case 2: + if (max > UINT16_MAX) + goto bad_argument; + break; + case 4: + if (max > UINT32_MAX) + goto bad_argument; + break; + case 8: + if (max > UINT64_MAX) + goto bad_argument; + break; + default: + goto bad_argument; + } + + if (min == 0 && max == 0) { + /* we have to be careful, since << can overflow */ + max = (1ULL << (8 * sz - 1)) * 2 - 1; + } + + if (strtoint(s, &val, base, 0) == -1) + return (-1); + + if (val < min) { + uu_set_error(UU_ERROR_UNDERFLOW); + return (-1); + } else if (val > max) { + uu_set_error(UU_ERROR_OVERFLOW); + return (-1); + } + + switch (sz) { + case 1: + *(uint8_t *)v = val; + return (0); + case 2: + *(uint16_t *)v = val; + return (0); + case 4: + *(uint32_t *)v = val; + return (0); + case 8: + *(uint64_t *)v = val; + return (0); + default: + break; /* shouldn't happen, fall through */ + } + +bad_argument: + uu_set_error(UU_ERROR_INVALID_ARGUMENT); + return (-1); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h new file mode 100644 index 0000000..8a707d1 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -0,0 +1,801 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. + * All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + */ + +#ifndef _LIBZFS_H +#define _LIBZFS_H + +#include <assert.h> +#include <libnvpair.h> +#include <sys/mnttab.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/varargs.h> +#include <sys/fs/zfs.h> +#include <sys/avl.h> +#include <sys/zfs_ioctl.h> +#include <libzfs_core.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Miscellaneous ZFS constants + */ +#define ZFS_MAXNAMELEN MAXNAMELEN +#define ZPOOL_MAXNAMELEN MAXNAMELEN +#define ZFS_MAXPROPLEN MAXPATHLEN +#define ZPOOL_MAXPROPLEN MAXPATHLEN + +/* + * libzfs errors + */ +typedef enum zfs_error { + EZFS_SUCCESS = 0, /* no error -- success */ + EZFS_NOMEM = 2000, /* out of memory */ + EZFS_BADPROP, /* invalid property value */ + EZFS_PROPREADONLY, /* cannot set readonly property */ + EZFS_PROPTYPE, /* property does not apply to dataset type */ + EZFS_PROPNONINHERIT, /* property is not inheritable */ + EZFS_PROPSPACE, /* bad quota or reservation */ + EZFS_BADTYPE, /* dataset is not of appropriate type */ + EZFS_BUSY, /* pool or dataset is busy */ + EZFS_EXISTS, /* pool or dataset already exists */ + EZFS_NOENT, /* no such pool or dataset */ + EZFS_BADSTREAM, /* bad backup stream */ + EZFS_DSREADONLY, /* dataset is readonly */ + EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */ + EZFS_INVALIDNAME, /* invalid dataset name */ + EZFS_BADRESTORE, /* unable to restore to destination */ + EZFS_BADBACKUP, /* backup failed */ + EZFS_BADTARGET, /* bad attach/detach/replace target */ + EZFS_NODEVICE, /* no such device in pool */ + EZFS_BADDEV, /* invalid device to add */ + EZFS_NOREPLICAS, /* no valid replicas */ + EZFS_RESILVERING, /* currently resilvering */ + EZFS_BADVERSION, /* unsupported version */ + EZFS_POOLUNAVAIL, /* pool is currently unavailable */ + EZFS_DEVOVERFLOW, /* too many devices in one vdev */ + EZFS_BADPATH, /* must be an absolute path */ + EZFS_CROSSTARGET, /* rename or clone across pool or dataset */ + EZFS_ZONED, /* used improperly in local zone */ + EZFS_MOUNTFAILED, /* failed to mount dataset */ + EZFS_UMOUNTFAILED, /* failed to unmount dataset */ + EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */ + EZFS_SHARENFSFAILED, /* share(1M) failed */ + EZFS_PERM, /* permission denied */ + EZFS_NOSPC, /* out of space */ + EZFS_FAULT, /* bad address */ + EZFS_IO, /* I/O error */ + EZFS_INTR, /* signal received */ + EZFS_ISSPARE, /* device is a hot spare */ + EZFS_INVALCONFIG, /* invalid vdev configuration */ + EZFS_RECURSIVE, /* recursive dependency */ + EZFS_NOHISTORY, /* no history object */ + EZFS_POOLPROPS, /* couldn't retrieve pool props */ + EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */ + EZFS_POOL_INVALARG, /* invalid argument for this pool operation */ + EZFS_NAMETOOLONG, /* dataset name is too long */ + EZFS_OPENFAILED, /* open of device failed */ + EZFS_NOCAP, /* couldn't get capacity */ + EZFS_LABELFAILED, /* write of label failed */ + EZFS_BADWHO, /* invalid permission who */ + EZFS_BADPERM, /* invalid permission */ + EZFS_BADPERMSET, /* invalid permission set name */ + EZFS_NODELEGATION, /* delegated administration is disabled */ + EZFS_UNSHARESMBFAILED, /* failed to unshare over smb */ + EZFS_SHARESMBFAILED, /* failed to share over smb */ + EZFS_BADCACHE, /* bad cache file */ + EZFS_ISL2CACHE, /* device is for the level 2 ARC */ + EZFS_VDEVNOTSUP, /* unsupported vdev type */ + EZFS_NOTSUP, /* ops not supported on this dataset */ + EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */ + EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */ + EZFS_REFTAG_RELE, /* snapshot release: tag not found */ + EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */ + EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */ + EZFS_PIPEFAILED, /* pipe create failed */ + EZFS_THREADCREATEFAILED, /* thread create failed */ + EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */ + EZFS_SCRUBBING, /* currently scrubbing */ + EZFS_NO_SCRUB, /* no active scrub */ + EZFS_DIFF, /* general failure of zfs diff */ + EZFS_DIFFDATA, /* bad zfs diff data */ + EZFS_POOLREADONLY, /* pool is in read-only mode */ + EZFS_UNKNOWN +} zfs_error_t; + +/* + * The following data structures are all part + * of the zfs_allow_t data structure which is + * used for printing 'allow' permissions. + * It is a linked list of zfs_allow_t's which + * then contain avl tree's for user/group/sets/... + * and each one of the entries in those trees have + * avl tree's for the permissions they belong to and + * whether they are local,descendent or local+descendent + * permissions. The AVL trees are used primarily for + * sorting purposes, but also so that we can quickly find + * a given user and or permission. + */ +typedef struct zfs_perm_node { + avl_node_t z_node; + char z_pname[MAXPATHLEN]; +} zfs_perm_node_t; + +typedef struct zfs_allow_node { + avl_node_t z_node; + char z_key[MAXPATHLEN]; /* name, such as joe */ + avl_tree_t z_localdescend; /* local+descendent perms */ + avl_tree_t z_local; /* local permissions */ + avl_tree_t z_descend; /* descendent permissions */ +} zfs_allow_node_t; + +typedef struct zfs_allow { + struct zfs_allow *z_next; + char z_setpoint[MAXPATHLEN]; + avl_tree_t z_sets; + avl_tree_t z_crperms; + avl_tree_t z_user; + avl_tree_t z_group; + avl_tree_t z_everyone; +} zfs_allow_t; + +/* + * Basic handle types + */ +typedef struct zfs_handle zfs_handle_t; +typedef struct zpool_handle zpool_handle_t; +typedef struct libzfs_handle libzfs_handle_t; + +/* + * Library initialization + */ +extern libzfs_handle_t *libzfs_init(void); +extern void libzfs_fini(libzfs_handle_t *); + +extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *); +extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); + +extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); + +extern void zfs_save_arguments(int argc, char **, char *, int); +extern int zpool_log_history(libzfs_handle_t *, const char *); + +extern int libzfs_errno(libzfs_handle_t *); +extern const char *libzfs_error_action(libzfs_handle_t *); +extern const char *libzfs_error_description(libzfs_handle_t *); +extern int zfs_standard_error(libzfs_handle_t *, int, const char *); +extern void libzfs_mnttab_init(libzfs_handle_t *); +extern void libzfs_mnttab_fini(libzfs_handle_t *); +extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t); +extern int libzfs_mnttab_find(libzfs_handle_t *, const char *, + struct mnttab *); +extern void libzfs_mnttab_add(libzfs_handle_t *, const char *, + const char *, const char *); +extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *); + +/* + * Basic handle functions + */ +extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *); +extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *); +extern void zpool_close(zpool_handle_t *); +extern const char *zpool_get_name(zpool_handle_t *); +extern int zpool_get_state(zpool_handle_t *); +extern const char *zpool_state_to_name(vdev_state_t, vdev_aux_t); +extern const char *zpool_pool_state_to_name(pool_state_t); +extern void zpool_free_handles(libzfs_handle_t *); + +/* + * Iterate over all active pools in the system. + */ +typedef int (*zpool_iter_f)(zpool_handle_t *, void *); +extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); + +/* + * Functions to create and destroy pools + */ +extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, + nvlist_t *, nvlist_t *); +extern int zpool_destroy(zpool_handle_t *, const char *); +extern int zpool_add(zpool_handle_t *, nvlist_t *); + +typedef struct splitflags { + /* do not split, but return the config that would be split off */ + int dryrun : 1; + + /* after splitting, import the pool */ + int import : 1; +} splitflags_t; + +/* + * Functions to manipulate pool and vdev state + */ +extern int zpool_scan(zpool_handle_t *, pool_scan_func_t); +extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); +extern int zpool_reguid(zpool_handle_t *); +extern int zpool_reopen(zpool_handle_t *); + +extern int zpool_vdev_online(zpool_handle_t *, const char *, int, + vdev_state_t *); +extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t); +extern int zpool_vdev_attach(zpool_handle_t *, const char *, + const char *, nvlist_t *, int); +extern int zpool_vdev_detach(zpool_handle_t *, const char *); +extern int zpool_vdev_remove(zpool_handle_t *, const char *); +extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, + splitflags_t); + +extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); +extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); +extern int zpool_vdev_clear(zpool_handle_t *, uint64_t); + +extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, + boolean_t *, boolean_t *); +extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, + boolean_t *, boolean_t *, boolean_t *); +extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *); + +/* + * Functions to manage pool properties + */ +extern int zpool_set_prop(zpool_handle_t *, const char *, const char *); +extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *, + size_t proplen, zprop_source_t *, boolean_t); +extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t, + zprop_source_t *); + +extern const char *zpool_prop_to_name(zpool_prop_t); +extern const char *zpool_prop_values(zpool_prop_t); + +/* + * Pool health statistics. + */ +typedef enum { + /* + * The following correspond to faults as defined in the (fault.fs.zfs.*) + * event namespace. Each is associated with a corresponding message ID. + */ + ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */ + ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */ + ZPOOL_STATUS_MISSING_DEV_NR, /* missing device with no replicas */ + ZPOOL_STATUS_CORRUPT_LABEL_R, /* bad device label with replicas */ + ZPOOL_STATUS_CORRUPT_LABEL_NR, /* bad device label with no replicas */ + ZPOOL_STATUS_BAD_GUID_SUM, /* sum of device guids didn't match */ + ZPOOL_STATUS_CORRUPT_POOL, /* pool metadata is corrupted */ + ZPOOL_STATUS_CORRUPT_DATA, /* data errors in user (meta)data */ + ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */ + ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */ + ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */ + ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ + ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ + ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ + + /* + * If the pool has unsupported features but can still be opened in + * read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the + * pool has unsupported features but cannot be opened at all, its + * status is ZPOOL_STATUS_UNSUP_FEAT_READ. + */ + ZPOOL_STATUS_UNSUP_FEAT_READ, /* unsupported features for read */ + ZPOOL_STATUS_UNSUP_FEAT_WRITE, /* unsupported features for write */ + + /* + * These faults have no corresponding message ID. At the time we are + * checking the status, the original reason for the FMA fault (I/O or + * checksum errors) has been lost. + */ + ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */ + ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */ + + /* + * The following are not faults per se, but still an error possibly + * requiring administrative attention. There is no corresponding + * message ID. + */ + ZPOOL_STATUS_VERSION_OLDER, /* older legacy on-disk version */ + ZPOOL_STATUS_FEAT_DISABLED, /* supported features are disabled */ + ZPOOL_STATUS_RESILVERING, /* device being resilvered */ + ZPOOL_STATUS_OFFLINE_DEV, /* device online */ + ZPOOL_STATUS_REMOVED_DEV, /* removed device */ + ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */ + + /* + * Finally, the following indicates a healthy pool. + */ + ZPOOL_STATUS_OK +} zpool_status_t; + +extern zpool_status_t zpool_get_status(zpool_handle_t *, char **); +extern zpool_status_t zpool_import_status(nvlist_t *, char **); +extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh); + +/* + * Statistics and configuration functions. + */ +extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); +extern nvlist_t *zpool_get_features(zpool_handle_t *); +extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *); +extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); + +/* + * Import and export functions + */ +extern int zpool_export(zpool_handle_t *, boolean_t, const char *); +extern int zpool_export_force(zpool_handle_t *, const char *); +extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, + char *altroot); +extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, + nvlist_t *, int); +extern void zpool_print_unsup_feat(nvlist_t *config); + +/* + * Search for pools to import + */ + +typedef struct importargs { + char **path; /* a list of paths to search */ + int paths; /* number of paths to search */ + char *poolname; /* name of a pool to find */ + uint64_t guid; /* guid of a pool to find */ + char *cachefile; /* cachefile to use for import */ + int can_be_active : 1; /* can the pool be active? */ + int unique : 1; /* does 'poolname' already exist? */ + int exists : 1; /* set on return if pool already exists */ +} importargs_t; + +extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *); + +/* legacy pool search routines */ +extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **); +extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *, + char *, uint64_t); + +/* + * Miscellaneous pool functions + */ +struct zfs_cmd; + +extern const char *zfs_history_event_names[]; + +extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, + boolean_t verbose); +extern int zpool_upgrade(zpool_handle_t *, uint64_t); +extern int zpool_get_history(zpool_handle_t *, nvlist_t **); +extern int zpool_history_unpack(char *, uint64_t, uint64_t *, + nvlist_t ***, uint_t *); +extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, + size_t len); +extern int zfs_ioctl(libzfs_handle_t *, int request, struct zfs_cmd *); +extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); +extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, + nvlist_t *); + +/* + * Basic handle manipulations. These functions do not create or destroy the + * underlying datasets, only the references to them. + */ +extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int); +extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *); +extern void zfs_close(zfs_handle_t *); +extern zfs_type_t zfs_get_type(const zfs_handle_t *); +extern const char *zfs_get_name(const zfs_handle_t *); +extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *); + +/* + * Property management functions. Some functions are shared with the kernel, + * and are found in sys/fs/zfs.h. + */ + +/* + * zfs dataset property management + */ +extern const char *zfs_prop_default_string(zfs_prop_t); +extern uint64_t zfs_prop_default_numeric(zfs_prop_t); +extern const char *zfs_prop_column_name(zfs_prop_t); +extern boolean_t zfs_prop_align_right(zfs_prop_t); + +extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, + nvlist_t *, uint64_t, zfs_handle_t *, const char *); + +extern const char *zfs_prop_to_name(zfs_prop_t); +extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); +extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, + zprop_source_t *, char *, size_t, boolean_t); +extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t, + boolean_t); +extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, + zprop_source_t *, char *, size_t); +extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue); +extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, + char *propbuf, int proplen, boolean_t literal); +extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue); +extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, + char *propbuf, int proplen, boolean_t literal); +extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname, + char *buf, size_t len); +extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); +extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); +extern const char *zfs_prop_values(zfs_prop_t); +extern int zfs_prop_is_string(zfs_prop_t prop); +extern nvlist_t *zfs_get_user_props(zfs_handle_t *); +extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); +extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); + + +typedef struct zprop_list { + int pl_prop; + char *pl_user_prop; + struct zprop_list *pl_next; + boolean_t pl_all; + size_t pl_width; + size_t pl_recvd_width; + boolean_t pl_fixed; +} zprop_list_t; + +extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t, + boolean_t); +extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *); + +#define ZFS_MOUNTPOINT_NONE "none" +#define ZFS_MOUNTPOINT_LEGACY "legacy" + +#define ZFS_FEATURE_DISABLED "disabled" +#define ZFS_FEATURE_ENABLED "enabled" +#define ZFS_FEATURE_ACTIVE "active" + +#define ZFS_UNSUPPORTED_INACTIVE "inactive" +#define ZFS_UNSUPPORTED_READONLY "readonly" + +/* + * zpool property management + */ +extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **); +extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *, + size_t); +extern const char *zpool_prop_default_string(zpool_prop_t); +extern uint64_t zpool_prop_default_numeric(zpool_prop_t); +extern const char *zpool_prop_column_name(zpool_prop_t); +extern boolean_t zpool_prop_align_right(zpool_prop_t); + +/* + * Functions shared by zfs and zpool property management. + */ +extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all, + boolean_t ordered, zfs_type_t type); +extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **, + zfs_type_t); +extern void zprop_free_list(zprop_list_t *); + +#define ZFS_GET_NCOLS 5 + +typedef enum { + GET_COL_NONE, + GET_COL_NAME, + GET_COL_PROPERTY, + GET_COL_VALUE, + GET_COL_RECVD, + GET_COL_SOURCE +} zfs_get_column_t; + +/* + * Functions for printing zfs or zpool properties + */ +typedef struct zprop_get_cbdata { + int cb_sources; + zfs_get_column_t cb_columns[ZFS_GET_NCOLS]; + int cb_colwidths[ZFS_GET_NCOLS + 1]; + boolean_t cb_scripted; + boolean_t cb_literal; + boolean_t cb_first; + zprop_list_t *cb_proplist; + zfs_type_t cb_type; +} zprop_get_cbdata_t; + +void zprop_print_one_property(const char *, zprop_get_cbdata_t *, + const char *, const char *, zprop_source_t, const char *, + const char *); + +/* + * Iterator functions. + */ +typedef int (*zfs_iter_f)(zfs_handle_t *, void *); +extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *); +extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); +extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *); +extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); +extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *); +extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *); +extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *); +extern int zfs_iter_bookmarks(zfs_handle_t *, zfs_iter_f, void *); + +typedef struct get_all_cb { + zfs_handle_t **cb_handles; + size_t cb_alloc; + size_t cb_used; + boolean_t cb_verbose; + int (*cb_getone)(zfs_handle_t *, void *); +} get_all_cb_t; + +void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); +int libzfs_dataset_cmp(const void *, const void *); + +/* + * Functions to create and destroy datasets. + */ +extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, + nvlist_t *); +extern int zfs_create_ancestors(libzfs_handle_t *, const char *); +extern int zfs_destroy(zfs_handle_t *, boolean_t); +extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); +extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t); +extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); +extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); +extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, + nvlist_t *props); +extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); + +typedef struct renameflags { + /* recursive rename */ + int recurse : 1; + + /* don't unmount file systems */ + int nounmount : 1; + + /* force unmount file systems */ + int forceunmount : 1; +} renameflags_t; + +extern int zfs_rename(zfs_handle_t *, const char *, const char *, + renameflags_t flags); + +typedef struct sendflags { + /* print informational messages (ie, -v was specified) */ + boolean_t verbose; + + /* recursive send (ie, -R) */ + boolean_t replicate; + + /* for incrementals, do all intermediate snapshots */ + boolean_t doall; + + /* if dataset is a clone, do incremental from its origin */ + boolean_t fromorigin; + + /* do deduplication */ + boolean_t dedup; + + /* send properties (ie, -p) */ + boolean_t props; + + /* do not send (no-op, ie. -n) */ + boolean_t dryrun; + + /* parsable verbose output (ie. -P) */ + boolean_t parsable; + + /* show progress (ie. -v) */ + boolean_t progress; + + /* large blocks (>128K) are permitted */ + boolean_t largeblock; + + /* WRITE_EMBEDDED records of type DATA are permitted */ + boolean_t embed_data; +} sendflags_t; + +typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); + +extern int zfs_send(zfs_handle_t *, const char *, const char *, + sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); +extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags); + +extern int zfs_promote(zfs_handle_t *); +extern int zfs_hold(zfs_handle_t *, const char *, const char *, + boolean_t, int); +extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); +extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); +extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); +extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); + +typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, + uid_t rid, uint64_t space); + +extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t, + zfs_userspace_cb_t, void *); + +extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **); +extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *); + +typedef struct recvflags { + /* print informational messages (ie, -v was specified) */ + boolean_t verbose; + + /* the destination is a prefix, not the exact fs (ie, -d) */ + boolean_t isprefix; + + /* + * Only the tail of the sent snapshot path is appended to the + * destination to determine the received snapshot name (ie, -e). + */ + boolean_t istail; + + /* do not actually do the recv, just check if it would work (ie, -n) */ + boolean_t dryrun; + + /* rollback/destroy filesystems as necessary (eg, -F) */ + boolean_t force; + + /* set "canmount=off" on all modified filesystems */ + boolean_t canmountoff; + + /* byteswap flag is used internally; callers need not specify */ + boolean_t byteswap; + + /* do not mount file systems as they are extracted (private) */ + boolean_t nomount; +} recvflags_t; + +extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *, + int, avl_tree_t *); + +typedef enum diff_flags { + ZFS_DIFF_PARSEABLE = 0x1, + ZFS_DIFF_TIMESTAMP = 0x2, + ZFS_DIFF_CLASSIFY = 0x4 +} diff_flags_t; + +extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *, + int); + +/* + * Miscellaneous functions. + */ +extern const char *zfs_type_to_name(zfs_type_t); +extern void zfs_refresh_properties(zfs_handle_t *); +extern int zfs_name_valid(const char *, zfs_type_t); +extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); +extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, + zfs_type_t); +extern int zfs_spa_version(zfs_handle_t *, int *); +extern boolean_t zfs_bookmark_exists(const char *path); + +/* + * Mount support functions. + */ +extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **); +extern boolean_t zfs_is_mounted(zfs_handle_t *, char **); +extern int zfs_mount(zfs_handle_t *, const char *, int); +extern int zfs_unmount(zfs_handle_t *, const char *, int); +extern int zfs_unmountall(zfs_handle_t *, int); + +/* + * Share support functions. + */ +extern boolean_t zfs_is_shared(zfs_handle_t *); +extern int zfs_share(zfs_handle_t *); +extern int zfs_unshare(zfs_handle_t *); + +/* + * Protocol-specific share support functions. + */ +extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **); +extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **); +extern int zfs_share_nfs(zfs_handle_t *); +extern int zfs_share_smb(zfs_handle_t *); +extern int zfs_shareall(zfs_handle_t *); +extern int zfs_unshare_nfs(zfs_handle_t *, const char *); +extern int zfs_unshare_smb(zfs_handle_t *, const char *); +extern int zfs_unshareall_nfs(zfs_handle_t *); +extern int zfs_unshareall_smb(zfs_handle_t *); +extern int zfs_unshareall_bypath(zfs_handle_t *, const char *); +extern int zfs_unshareall(zfs_handle_t *); +extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *, + void *, void *, int, zfs_share_op_t); + +/* + * FreeBSD-specific jail support function. + */ +extern int zfs_jail(zfs_handle_t *, int, int); + +/* + * When dealing with nvlists, verify() is extremely useful + */ +#ifndef verify +#ifdef NDEBUG +#define verify(EX) ((void)(EX)) +#else +#define verify(EX) assert(EX) +#endif +#endif + +/* + * Utility function to convert a number to a human-readable form. + */ +extern void zfs_nicenum(uint64_t, char *, size_t); +extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *); + +/* + * Given a device or file, determine if it is part of a pool. + */ +extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, + boolean_t *); + +/* + * Label manipulation. + */ +extern int zpool_read_label(int, nvlist_t **); +extern int zpool_clear_label(int); + +/* is this zvol valid for use as a dump device? */ +extern int zvol_check_dump_config(char *); + +/* + * Management interfaces for SMB ACL files + */ + +int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *); +int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *); +int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *); +int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *); + +/* + * Enable and disable datasets within a pool by mounting/unmounting and + * sharing/unsharing them. + */ +extern int zpool_enable_datasets(zpool_handle_t *, const char *, int); +extern int zpool_disable_datasets(zpool_handle_t *, boolean_t); + +/* + * Mappings between vdev and FRU. + */ +extern void libzfs_fru_refresh(libzfs_handle_t *); +extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *); +extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *); +extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *, + const char *); +extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *); +extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *); + +#ifndef sun +extern int zmount(const char *, const char *, int, char *, char *, int, char *, + int); +#endif /* !sun */ + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c new file mode 100644 index 0000000..a899965 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c @@ -0,0 +1,700 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Portions Copyright 2007 Ramprakash Jelari + * + * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. + * All rights reserved. + */ + +#include <libintl.h> +#include <libuutil.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <zone.h> + +#include <libzfs.h> + +#include "libzfs_impl.h" + +/* + * Structure to keep track of dataset state. Before changing the 'sharenfs' or + * 'mountpoint' property, we record whether the filesystem was previously + * mounted/shared. This prior state dictates whether we remount/reshare the + * dataset after the property has been changed. + * + * The interface consists of the following sequence of functions: + * + * changelist_gather() + * changelist_prefix() + * < change property > + * changelist_postfix() + * changelist_free() + * + * Other interfaces: + * + * changelist_remove() - remove a node from a gathered list + * changelist_rename() - renames all datasets appropriately when doing a rename + * changelist_unshare() - unshares all the nodes in a given changelist + * changelist_haszonedchild() - check if there is any child exported to + * a local zone + */ +typedef struct prop_changenode { + zfs_handle_t *cn_handle; + int cn_shared; + int cn_mounted; + int cn_zoned; + boolean_t cn_needpost; /* is postfix() needed? */ + uu_list_node_t cn_listnode; +} prop_changenode_t; + +struct prop_changelist { + zfs_prop_t cl_prop; + zfs_prop_t cl_realprop; + zfs_prop_t cl_shareprop; /* used with sharenfs/sharesmb */ + uu_list_pool_t *cl_pool; + uu_list_t *cl_list; + boolean_t cl_waslegacy; + boolean_t cl_allchildren; + boolean_t cl_alldependents; + int cl_mflags; /* Mount flags */ + int cl_gflags; /* Gather request flags */ + boolean_t cl_haszonedchild; + boolean_t cl_sorted; +}; + +/* + * If the property is 'mountpoint', go through and unmount filesystems as + * necessary. We don't do the same for 'sharenfs', because we can just re-share + * with different options without interrupting service. We do handle 'sharesmb' + * since there may be old resource names that need to be removed. + */ +int +changelist_prefix(prop_changelist_t *clp) +{ + prop_changenode_t *cn; + int ret = 0; + + if (clp->cl_prop != ZFS_PROP_MOUNTPOINT && + clp->cl_prop != ZFS_PROP_SHARESMB) + return (0); + + for (cn = uu_list_first(clp->cl_list); cn != NULL; + cn = uu_list_next(clp->cl_list, cn)) { + + /* if a previous loop failed, set the remaining to false */ + if (ret == -1) { + cn->cn_needpost = B_FALSE; + continue; + } + + /* + * If we are in the global zone, but this dataset is exported + * to a local zone, do nothing. + */ + if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned) + continue; + + if (!ZFS_IS_VOLUME(cn->cn_handle)) { + /* + * Do the property specific processing. + */ + switch (clp->cl_prop) { + case ZFS_PROP_MOUNTPOINT: + if (clp->cl_gflags & CL_GATHER_DONT_UNMOUNT) + break; + if (zfs_unmount(cn->cn_handle, NULL, + clp->cl_mflags) != 0) { + ret = -1; + cn->cn_needpost = B_FALSE; + } + break; + case ZFS_PROP_SHARESMB: + (void) zfs_unshare_smb(cn->cn_handle, NULL); + break; + } + } + } + + if (ret == -1) + (void) changelist_postfix(clp); + + return (ret); +} + +/* + * If the property is 'mountpoint' or 'sharenfs', go through and remount and/or + * reshare the filesystems as necessary. In changelist_gather() we recorded + * whether the filesystem was previously shared or mounted. The action we take + * depends on the previous state, and whether the value was previously 'legacy'. + * For non-legacy properties, we only remount/reshare the filesystem if it was + * previously mounted/shared. Otherwise, we always remount/reshare the + * filesystem. + */ +int +changelist_postfix(prop_changelist_t *clp) +{ + prop_changenode_t *cn; + char shareopts[ZFS_MAXPROPLEN]; + int errors = 0; + libzfs_handle_t *hdl; + + /* + * If we're changing the mountpoint, attempt to destroy the underlying + * mountpoint. All other datasets will have inherited from this dataset + * (in which case their mountpoints exist in the filesystem in the new + * location), or have explicit mountpoints set (in which case they won't + * be in the changelist). + */ + if ((cn = uu_list_last(clp->cl_list)) == NULL) + return (0); + + if (clp->cl_prop == ZFS_PROP_MOUNTPOINT && + !(clp->cl_gflags & CL_GATHER_DONT_UNMOUNT)) { + remove_mountpoint(cn->cn_handle); + } + + /* + * It is possible that the changelist_prefix() used libshare + * to unshare some entries. Since libshare caches data, an + * attempt to reshare during postfix can fail unless libshare + * is uninitialized here so that it will reinitialize later. + */ + if (cn->cn_handle != NULL) { + hdl = cn->cn_handle->zfs_hdl; + assert(hdl != NULL); + zfs_uninit_libshare(hdl); + } + + /* + * We walk the datasets in reverse, because we want to mount any parent + * datasets before mounting the children. We walk all datasets even if + * there are errors. + */ + for (cn = uu_list_last(clp->cl_list); cn != NULL; + cn = uu_list_prev(clp->cl_list, cn)) { + + boolean_t sharenfs; + boolean_t sharesmb; + boolean_t mounted; + + /* + * If we are in the global zone, but this dataset is exported + * to a local zone, do nothing. + */ + if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned) + continue; + + /* Only do post-processing if it's required */ + if (!cn->cn_needpost) + continue; + cn->cn_needpost = B_FALSE; + + zfs_refresh_properties(cn->cn_handle); + + if (ZFS_IS_VOLUME(cn->cn_handle)) + continue; + + /* + * Remount if previously mounted or mountpoint was legacy, + * or sharenfs or sharesmb property is set. + */ + sharenfs = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS, + shareopts, sizeof (shareopts), NULL, NULL, 0, + B_FALSE) == 0) && (strcmp(shareopts, "off") != 0)); + + sharesmb = ((zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARESMB, + shareopts, sizeof (shareopts), NULL, NULL, 0, + B_FALSE) == 0) && (strcmp(shareopts, "off") != 0)); + + mounted = (clp->cl_gflags & CL_GATHER_DONT_UNMOUNT) || + zfs_is_mounted(cn->cn_handle, NULL); + + if (!mounted && (cn->cn_mounted || + ((sharenfs || sharesmb || clp->cl_waslegacy) && + (zfs_prop_get_int(cn->cn_handle, + ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) { + + if (zfs_mount(cn->cn_handle, NULL, 0) != 0) + errors++; + else + mounted = TRUE; + } + + /* + * If the file system is mounted we always re-share even + * if the filesystem is currently shared, so that we can + * adopt any new options. + */ + if (sharenfs && mounted) + errors += zfs_share_nfs(cn->cn_handle); + else if (cn->cn_shared || clp->cl_waslegacy) + errors += zfs_unshare_nfs(cn->cn_handle, NULL); + if (sharesmb && mounted) + errors += zfs_share_smb(cn->cn_handle); + else if (cn->cn_shared || clp->cl_waslegacy) + errors += zfs_unshare_smb(cn->cn_handle, NULL); + } + + return (errors ? -1 : 0); +} + +/* + * Is this "dataset" a child of "parent"? + */ +boolean_t +isa_child_of(const char *dataset, const char *parent) +{ + int len; + + len = strlen(parent); + + if (strncmp(dataset, parent, len) == 0 && + (dataset[len] == '@' || dataset[len] == '/' || + dataset[len] == '\0')) + return (B_TRUE); + else + return (B_FALSE); + +} + +/* + * If we rename a filesystem, child filesystem handles are no longer valid + * since we identify each dataset by its name in the ZFS namespace. As a + * result, we have to go through and fix up all the names appropriately. We + * could do this automatically if libzfs kept track of all open handles, but + * this is a lot less work. + */ +void +changelist_rename(prop_changelist_t *clp, const char *src, const char *dst) +{ + prop_changenode_t *cn; + char newname[ZFS_MAXNAMELEN]; + + for (cn = uu_list_first(clp->cl_list); cn != NULL; + cn = uu_list_next(clp->cl_list, cn)) { + /* + * Do not rename a clone that's not in the source hierarchy. + */ + if (!isa_child_of(cn->cn_handle->zfs_name, src)) + continue; + + /* + * Destroy the previous mountpoint if needed. + */ + remove_mountpoint(cn->cn_handle); + + (void) strlcpy(newname, dst, sizeof (newname)); + (void) strcat(newname, cn->cn_handle->zfs_name + strlen(src)); + + (void) strlcpy(cn->cn_handle->zfs_name, newname, + sizeof (cn->cn_handle->zfs_name)); + } +} + +/* + * Given a gathered changelist for the 'sharenfs' or 'sharesmb' property, + * unshare all the datasets in the list. + */ +int +changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto) +{ + prop_changenode_t *cn; + int ret = 0; + + if (clp->cl_prop != ZFS_PROP_SHARENFS && + clp->cl_prop != ZFS_PROP_SHARESMB) + return (0); + + for (cn = uu_list_first(clp->cl_list); cn != NULL; + cn = uu_list_next(clp->cl_list, cn)) { + if (zfs_unshare_proto(cn->cn_handle, NULL, proto) != 0) + ret = -1; + } + + return (ret); +} + +/* + * Check if there is any child exported to a local zone in a given changelist. + * This information has already been recorded while gathering the changelist + * via changelist_gather(). + */ +int +changelist_haszonedchild(prop_changelist_t *clp) +{ + return (clp->cl_haszonedchild); +} + +/* + * Remove a node from a gathered list. + */ +void +changelist_remove(prop_changelist_t *clp, const char *name) +{ + prop_changenode_t *cn; + + for (cn = uu_list_first(clp->cl_list); cn != NULL; + cn = uu_list_next(clp->cl_list, cn)) { + + if (strcmp(cn->cn_handle->zfs_name, name) == 0) { + uu_list_remove(clp->cl_list, cn); + zfs_close(cn->cn_handle); + free(cn); + return; + } + } +} + +/* + * Release any memory associated with a changelist. + */ +void +changelist_free(prop_changelist_t *clp) +{ + prop_changenode_t *cn; + void *cookie; + + if (clp->cl_list) { + cookie = NULL; + while ((cn = uu_list_teardown(clp->cl_list, &cookie)) != NULL) { + zfs_close(cn->cn_handle); + free(cn); + } + + uu_list_destroy(clp->cl_list); + } + if (clp->cl_pool) + uu_list_pool_destroy(clp->cl_pool); + + free(clp); +} + +static int +change_one(zfs_handle_t *zhp, void *data) +{ + prop_changelist_t *clp = data; + char property[ZFS_MAXPROPLEN]; + char where[64]; + prop_changenode_t *cn; + zprop_source_t sourcetype; + zprop_source_t share_sourcetype; + + /* + * We only want to unmount/unshare those filesystems that may inherit + * from the target filesystem. If we find any filesystem with a + * locally set mountpoint, we ignore any children since changing the + * property will not affect them. If this is a rename, we iterate + * over all children regardless, since we need them unmounted in + * order to do the rename. Also, if this is a volume and we're doing + * a rename, then always add it to the changelist. + */ + + if (!(ZFS_IS_VOLUME(zhp) && clp->cl_realprop == ZFS_PROP_NAME) && + zfs_prop_get(zhp, clp->cl_prop, property, + sizeof (property), &sourcetype, where, sizeof (where), + B_FALSE) != 0) { + zfs_close(zhp); + return (0); + } + + /* + * If we are "watching" sharenfs or sharesmb + * then check out the companion property which is tracked + * in cl_shareprop + */ + if (clp->cl_shareprop != ZPROP_INVAL && + zfs_prop_get(zhp, clp->cl_shareprop, property, + sizeof (property), &share_sourcetype, where, sizeof (where), + B_FALSE) != 0) { + zfs_close(zhp); + return (0); + } + + if (clp->cl_alldependents || clp->cl_allchildren || + sourcetype == ZPROP_SRC_DEFAULT || + sourcetype == ZPROP_SRC_INHERITED || + (clp->cl_shareprop != ZPROP_INVAL && + (share_sourcetype == ZPROP_SRC_DEFAULT || + share_sourcetype == ZPROP_SRC_INHERITED))) { + if ((cn = zfs_alloc(zfs_get_handle(zhp), + sizeof (prop_changenode_t))) == NULL) { + zfs_close(zhp); + return (-1); + } + + cn->cn_handle = zhp; + cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) || + zfs_is_mounted(zhp, NULL); + cn->cn_shared = zfs_is_shared(zhp); + cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); + cn->cn_needpost = B_TRUE; + + /* Indicate if any child is exported to a local zone. */ + if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned) + clp->cl_haszonedchild = B_TRUE; + + uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool); + + if (clp->cl_sorted) { + uu_list_index_t idx; + + (void) uu_list_find(clp->cl_list, cn, NULL, + &idx); + uu_list_insert(clp->cl_list, cn, idx); + } else { + /* + * Add this child to beginning of the list. Children + * below this one in the hierarchy will get added above + * this one in the list. This produces a list in + * reverse dataset name order. + * This is necessary when the original mountpoint + * is legacy or none. + */ + verify(uu_list_insert_before(clp->cl_list, + uu_list_first(clp->cl_list), cn) == 0); + } + + if (!clp->cl_alldependents) + return (zfs_iter_children(zhp, change_one, data)); + } else { + zfs_close(zhp); + } + + return (0); +} + +/*ARGSUSED*/ +static int +compare_mountpoints(const void *a, const void *b, void *unused) +{ + const prop_changenode_t *ca = a; + const prop_changenode_t *cb = b; + + char mounta[MAXPATHLEN]; + char mountb[MAXPATHLEN]; + + boolean_t hasmounta, hasmountb; + + /* + * When unsharing or unmounting filesystems, we need to do it in + * mountpoint order. This allows the user to have a mountpoint + * hierarchy that is different from the dataset hierarchy, and still + * allow it to be changed. However, if either dataset doesn't have a + * mountpoint (because it is a volume or a snapshot), we place it at the + * end of the list, because it doesn't affect our change at all. + */ + hasmounta = (zfs_prop_get(ca->cn_handle, ZFS_PROP_MOUNTPOINT, mounta, + sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); + hasmountb = (zfs_prop_get(cb->cn_handle, ZFS_PROP_MOUNTPOINT, mountb, + sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); + + if (!hasmounta && hasmountb) + return (-1); + else if (hasmounta && !hasmountb) + return (1); + else if (!hasmounta && !hasmountb) + return (0); + else + return (strcmp(mountb, mounta)); +} + +/* + * Given a ZFS handle and a property, construct a complete list of datasets + * that need to be modified as part of this process. For anything but the + * 'mountpoint' and 'sharenfs' properties, this just returns an empty list. + * Otherwise, we iterate over all children and look for any datasets that + * inherit the property. For each such dataset, we add it to the list and + * mark whether it was shared beforehand. + */ +prop_changelist_t * +changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags, + int mnt_flags) +{ + prop_changelist_t *clp; + prop_changenode_t *cn; + zfs_handle_t *temp; + char property[ZFS_MAXPROPLEN]; + uu_compare_fn_t *compare = NULL; + boolean_t legacy = B_FALSE; + + if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL) + return (NULL); + + /* + * For mountpoint-related tasks, we want to sort everything by + * mountpoint, so that we mount and unmount them in the appropriate + * order, regardless of their position in the hierarchy. + */ + if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED || + prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS || + prop == ZFS_PROP_SHARESMB) { + + if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, + property, sizeof (property), + NULL, NULL, 0, B_FALSE) == 0 && + (strcmp(property, "legacy") == 0 || + strcmp(property, "none") == 0)) { + + legacy = B_TRUE; + } + if (!legacy) { + compare = compare_mountpoints; + clp->cl_sorted = B_TRUE; + } + } + + clp->cl_pool = uu_list_pool_create("changelist_pool", + sizeof (prop_changenode_t), + offsetof(prop_changenode_t, cn_listnode), + compare, 0); + if (clp->cl_pool == NULL) { + assert(uu_error() == UU_ERROR_NO_MEMORY); + (void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error"); + changelist_free(clp); + return (NULL); + } + + clp->cl_list = uu_list_create(clp->cl_pool, NULL, + clp->cl_sorted ? UU_LIST_SORTED : 0); + clp->cl_gflags = gather_flags; + clp->cl_mflags = mnt_flags; + + if (clp->cl_list == NULL) { + assert(uu_error() == UU_ERROR_NO_MEMORY); + (void) zfs_error(zhp->zfs_hdl, EZFS_NOMEM, "internal error"); + changelist_free(clp); + return (NULL); + } + + /* + * If this is a rename or the 'zoned' property, we pretend we're + * changing the mountpoint and flag it so we can catch all children in + * change_one(). + * + * Flag cl_alldependents to catch all children plus the dependents + * (clones) that are not in the hierarchy. + */ + if (prop == ZFS_PROP_NAME) { + clp->cl_prop = ZFS_PROP_MOUNTPOINT; + clp->cl_alldependents = B_TRUE; + } else if (prop == ZFS_PROP_ZONED) { + clp->cl_prop = ZFS_PROP_MOUNTPOINT; + clp->cl_allchildren = B_TRUE; + } else if (prop == ZFS_PROP_CANMOUNT) { + clp->cl_prop = ZFS_PROP_MOUNTPOINT; + } else if (prop == ZFS_PROP_VOLSIZE) { + clp->cl_prop = ZFS_PROP_MOUNTPOINT; + } else { + clp->cl_prop = prop; + } + clp->cl_realprop = prop; + + if (clp->cl_prop != ZFS_PROP_MOUNTPOINT && + clp->cl_prop != ZFS_PROP_SHARENFS && + clp->cl_prop != ZFS_PROP_SHARESMB) + return (clp); + + /* + * If watching SHARENFS or SHARESMB then + * also watch its companion property. + */ + if (clp->cl_prop == ZFS_PROP_SHARENFS) + clp->cl_shareprop = ZFS_PROP_SHARESMB; + else if (clp->cl_prop == ZFS_PROP_SHARESMB) + clp->cl_shareprop = ZFS_PROP_SHARENFS; + + if (clp->cl_alldependents) { + if (zfs_iter_dependents(zhp, B_TRUE, change_one, clp) != 0) { + changelist_free(clp); + return (NULL); + } + } else if (zfs_iter_children(zhp, change_one, clp) != 0) { + changelist_free(clp); + return (NULL); + } + + /* + * We have to re-open ourselves because we auto-close all the handles + * and can't tell the difference. + */ + if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp), + ZFS_TYPE_DATASET)) == NULL) { + changelist_free(clp); + return (NULL); + } + + /* + * Always add ourself to the list. We add ourselves to the end so that + * we're the last to be unmounted. + */ + if ((cn = zfs_alloc(zhp->zfs_hdl, + sizeof (prop_changenode_t))) == NULL) { + zfs_close(temp); + changelist_free(clp); + return (NULL); + } + + cn->cn_handle = temp; + cn->cn_mounted = (clp->cl_gflags & CL_GATHER_MOUNT_ALWAYS) || + zfs_is_mounted(temp, NULL); + cn->cn_shared = zfs_is_shared(temp); + cn->cn_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); + cn->cn_needpost = B_TRUE; + + uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool); + if (clp->cl_sorted) { + uu_list_index_t idx; + (void) uu_list_find(clp->cl_list, cn, NULL, &idx); + uu_list_insert(clp->cl_list, cn, idx); + } else { + /* + * Add the target dataset to the end of the list. + * The list is not really unsorted. The list will be + * in reverse dataset name order. This is necessary + * when the original mountpoint is legacy or none. + */ + verify(uu_list_insert_after(clp->cl_list, + uu_list_last(clp->cl_list), cn) == 0); + } + + /* + * If the mountpoint property was previously 'legacy', or 'none', + * record it as the behavior of changelist_postfix() will be different. + */ + if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) && legacy) { + /* + * do not automatically mount ex-legacy datasets if + * we specifically set canmount to noauto + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) != + ZFS_CANMOUNT_NOAUTO) + clp->cl_waslegacy = B_TRUE; + } + + return (clp); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c new file mode 100644 index 0000000..a3f6129 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c @@ -0,0 +1,112 @@ +/* + * CDDL HEADER SART + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + */ + +#include "libzfs_compat.h" + +int zfs_ioctl_version = ZFS_IOCVER_UNDEF; +static int zfs_spa_version = -1; + +/* + * Get zfs_ioctl_version + */ +int +get_zfs_ioctl_version(void) +{ + size_t ver_size; + int ver = ZFS_IOCVER_NONE; + + ver_size = sizeof(ver); + sysctlbyname("vfs.zfs.version.ioctl", &ver, &ver_size, NULL, 0); + + return (ver); +} + +/* + * Get the SPA version + */ +static int +get_zfs_spa_version(void) +{ + size_t ver_size; + int ver = 0; + + ver_size = sizeof(ver); + sysctlbyname("vfs.zfs.version.spa", &ver, &ver_size, NULL, 0); + + return (ver); +} + +/* + * This is FreeBSD version of ioctl, because Solaris' ioctl() updates + * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an + * error is returned zc_nvlist_dst_size won't be updated. + */ +int +zcmd_ioctl(int fd, int request, zfs_cmd_t *zc) +{ + size_t oldsize; + int ret, cflag = ZFS_CMD_COMPAT_NONE; + + if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) + zfs_ioctl_version = get_zfs_ioctl_version(); + + if (zfs_ioctl_version >= ZFS_IOCVER_DEADMAN) { + switch (zfs_ioctl_version) { + case ZFS_IOCVER_ZCMD: + cflag = ZFS_CMD_COMPAT_ZCMD; + break; + case ZFS_IOCVER_LZC: + cflag = ZFS_CMD_COMPAT_LZC; + break; + case ZFS_IOCVER_DEADMAN: + cflag = ZFS_CMD_COMPAT_DEADMAN; + break; + } + } else { + /* + * If vfs.zfs.version.ioctl is not defined, assume we have v28 + * compatible binaries and use vfs.zfs.version.spa to test for v15 + */ + cflag = ZFS_CMD_COMPAT_V28; + + if (zfs_spa_version < 0) + zfs_spa_version = get_zfs_spa_version(); + + if (zfs_spa_version == SPA_VERSION_15 || + zfs_spa_version == SPA_VERSION_14 || + zfs_spa_version == SPA_VERSION_13) + cflag = ZFS_CMD_COMPAT_V15; + } + + oldsize = zc->zc_nvlist_dst_size; + ret = zcmd_ioctl_compat(fd, request, zc, cflag); + + if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) { + ret = -1; + errno = ENOMEM; + } + + return (ret); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h new file mode 100644 index 0000000..3761668 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER SART + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + */ + +#ifndef _LIBZFS_COMPAT_H +#define _LIBZFS_COMPAT_H + +#include <zfs_ioctl_compat.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int get_zfs_ioctl_version(void); +int zcmd_ioctl(int fd, int request, zfs_cmd_t *zc); + +#define ioctl(fd, ioc, zc) zcmd_ioctl((fd), (ioc), (zc)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_COMPAT_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c new file mode 100644 index 0000000..d5ba20f --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c @@ -0,0 +1,453 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * The pool configuration repository is stored in /etc/zfs/zpool.cache as a + * single packed nvlist. While it would be nice to just read in this + * file from userland, this wouldn't work from a local zone. So we have to have + * a zpool ioctl to return the complete configuration for all pools. In the + * global zone, this will be identical to reading the file and unpacking it in + * userland. + */ + +#include <errno.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stddef.h> +#include <string.h> +#include <unistd.h> +#include <libintl.h> +#include <libuutil.h> + +#include "libzfs_impl.h" + +typedef struct config_node { + char *cn_name; + nvlist_t *cn_config; + uu_avl_node_t cn_avl; +} config_node_t; + +/* ARGSUSED */ +static int +config_node_compare(const void *a, const void *b, void *unused) +{ + int ret; + + const config_node_t *ca = (config_node_t *)a; + const config_node_t *cb = (config_node_t *)b; + + ret = strcmp(ca->cn_name, cb->cn_name); + + if (ret < 0) + return (-1); + else if (ret > 0) + return (1); + else + return (0); +} + +void +namespace_clear(libzfs_handle_t *hdl) +{ + if (hdl->libzfs_ns_avl) { + config_node_t *cn; + void *cookie = NULL; + + while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, + &cookie)) != NULL) { + nvlist_free(cn->cn_config); + free(cn->cn_name); + free(cn); + } + + uu_avl_destroy(hdl->libzfs_ns_avl); + hdl->libzfs_ns_avl = NULL; + } + + if (hdl->libzfs_ns_avlpool) { + uu_avl_pool_destroy(hdl->libzfs_ns_avlpool); + hdl->libzfs_ns_avlpool = NULL; + } +} + +/* + * Loads the pool namespace, or re-loads it if the cache has changed. + */ +static int +namespace_reload(libzfs_handle_t *hdl) +{ + nvlist_t *config; + config_node_t *cn; + nvpair_t *elem; + zfs_cmd_t zc = { 0 }; + void *cookie; + + if (hdl->libzfs_ns_gen == 0) { + /* + * This is the first time we've accessed the configuration + * cache. Initialize the AVL tree and then fall through to the + * common code. + */ + if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool", + sizeof (config_node_t), + offsetof(config_node_t, cn_avl), + config_node_compare, UU_DEFAULT)) == NULL) + return (no_memory(hdl)); + + if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool, + NULL, UU_DEFAULT)) == NULL) + return (no_memory(hdl)); + } + + if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) + return (-1); + + for (;;) { + zc.zc_cookie = hdl->libzfs_ns_gen; + if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) { + switch (errno) { + case EEXIST: + /* + * The namespace hasn't changed. + */ + zcmd_free_nvlists(&zc); + return (0); + + case ENOMEM: + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + break; + + default: + zcmd_free_nvlists(&zc); + return (zfs_standard_error(hdl, errno, + dgettext(TEXT_DOMAIN, "failed to read " + "pool configuration"))); + } + } else { + hdl->libzfs_ns_gen = zc.zc_cookie; + break; + } + } + + if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + + zcmd_free_nvlists(&zc); + + /* + * Clear out any existing configuration information. + */ + cookie = NULL; + while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, &cookie)) != NULL) { + nvlist_free(cn->cn_config); + free(cn->cn_name); + free(cn); + } + + elem = NULL; + while ((elem = nvlist_next_nvpair(config, elem)) != NULL) { + nvlist_t *child; + uu_avl_index_t where; + + if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) { + nvlist_free(config); + return (-1); + } + + if ((cn->cn_name = zfs_strdup(hdl, + nvpair_name(elem))) == NULL) { + free(cn); + nvlist_free(config); + return (-1); + } + + verify(nvpair_value_nvlist(elem, &child) == 0); + if (nvlist_dup(child, &cn->cn_config, 0) != 0) { + free(cn->cn_name); + free(cn); + nvlist_free(config); + return (no_memory(hdl)); + } + verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where) + == NULL); + + uu_avl_insert(hdl->libzfs_ns_avl, cn, where); + } + + nvlist_free(config); + return (0); +} + +/* + * Retrieve the configuration for the given pool. The configuration is a nvlist + * describing the vdevs, as well as the statistics associated with each one. + */ +nvlist_t * +zpool_get_config(zpool_handle_t *zhp, nvlist_t **oldconfig) +{ + if (oldconfig) + *oldconfig = zhp->zpool_old_config; + return (zhp->zpool_config); +} + +/* + * Retrieves a list of enabled features and their refcounts and caches it in + * the pool handle. + */ +nvlist_t * +zpool_get_features(zpool_handle_t *zhp) +{ + nvlist_t *config, *features; + + config = zpool_get_config(zhp, NULL); + + if (config == NULL || !nvlist_exists(config, + ZPOOL_CONFIG_FEATURE_STATS)) { + int error; + boolean_t missing = B_FALSE; + + error = zpool_refresh_stats(zhp, &missing); + + if (error != 0 || missing) + return (NULL); + + config = zpool_get_config(zhp, NULL); + } + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, + &features) == 0); + + return (features); +} + +/* + * Refresh the vdev statistics associated with the given pool. This is used in + * iostat to show configuration changes and determine the delta from the last + * time the function was called. This function can fail, in case the pool has + * been destroyed. + */ +int +zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing) +{ + zfs_cmd_t zc = { 0 }; + int error; + nvlist_t *config; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + *missing = B_FALSE; + (void) strcpy(zc.zc_name, zhp->zpool_name); + + if (zhp->zpool_config_size == 0) + zhp->zpool_config_size = 1 << 16; + + if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size) != 0) + return (-1); + + for (;;) { + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS, + &zc) == 0) { + /* + * The real error is returned in the zc_cookie field. + */ + error = zc.zc_cookie; + break; + } + + if (errno == ENOMEM) { + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + } else { + zcmd_free_nvlists(&zc); + if (errno == ENOENT || errno == EINVAL) + *missing = B_TRUE; + zhp->zpool_state = POOL_STATE_UNAVAIL; + return (0); + } + } + + if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + + zcmd_free_nvlists(&zc); + + zhp->zpool_config_size = zc.zc_nvlist_dst_size; + + if (zhp->zpool_config != NULL) { + uint64_t oldtxg, newtxg; + + verify(nvlist_lookup_uint64(zhp->zpool_config, + ZPOOL_CONFIG_POOL_TXG, &oldtxg) == 0); + verify(nvlist_lookup_uint64(config, + ZPOOL_CONFIG_POOL_TXG, &newtxg) == 0); + + if (zhp->zpool_old_config != NULL) + nvlist_free(zhp->zpool_old_config); + + if (oldtxg != newtxg) { + nvlist_free(zhp->zpool_config); + zhp->zpool_old_config = NULL; + } else { + zhp->zpool_old_config = zhp->zpool_config; + } + } + + zhp->zpool_config = config; + if (error) + zhp->zpool_state = POOL_STATE_UNAVAIL; + else + zhp->zpool_state = POOL_STATE_ACTIVE; + + return (0); +} + +/* + * If the __ZFS_POOL_RESTRICT environment variable is set we only iterate over + * pools it lists. + * + * This is an undocumented feature for use during testing only. + * + * This function returns B_TRUE if the pool should be skipped + * during iteration. + */ +static boolean_t +check_restricted(const char *poolname) +{ + static boolean_t initialized = B_FALSE; + static char *restricted = NULL; + + const char *cur, *end; + int len, namelen; + + if (!initialized) { + initialized = B_TRUE; + restricted = getenv("__ZFS_POOL_RESTRICT"); + } + + if (NULL == restricted) + return (B_FALSE); + + cur = restricted; + namelen = strlen(poolname); + do { + end = strchr(cur, ' '); + len = (NULL == end) ? strlen(cur) : (end - cur); + + if (len == namelen && 0 == strncmp(cur, poolname, len)) { + return (B_FALSE); + } + + cur += (len + 1); + } while (NULL != end); + + return (B_TRUE); +} + +/* + * Iterate over all pools in the system. + */ +int +zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data) +{ + config_node_t *cn; + zpool_handle_t *zhp; + int ret; + + /* + * If someone makes a recursive call to zpool_iter(), we want to avoid + * refreshing the namespace because that will invalidate the parent + * context. We allow recursive calls, but simply re-use the same + * namespace AVL tree. + */ + if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0) + return (-1); + + hdl->libzfs_pool_iter++; + for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL; + cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) { + + if (check_restricted(cn->cn_name)) + continue; + + if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) { + hdl->libzfs_pool_iter--; + return (-1); + } + + if (zhp == NULL) + continue; + + if ((ret = func(zhp, data)) != 0) { + hdl->libzfs_pool_iter--; + return (ret); + } + } + hdl->libzfs_pool_iter--; + + return (0); +} + +/* + * Iterate over root datasets, calling the given function for each. The zfs + * handle passed each time must be explicitly closed by the callback. + */ +int +zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data) +{ + config_node_t *cn; + zfs_handle_t *zhp; + int ret; + + if (namespace_reload(hdl) != 0) + return (-1); + + for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL; + cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) { + + if (check_restricted(cn->cn_name)) + continue; + + if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL) + continue; + + if ((ret = func(zhp, data)) != 0) + return (ret); + } + + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c new file mode 100644 index 0000000..8df7471 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c @@ -0,0 +1,4788 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. + * Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel@dawidek.net>. + * All rights reserved. + * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + */ + +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <stddef.h> +#include <zone.h> +#include <fcntl.h> +#include <sys/mntent.h> +#include <sys/mount.h> +#include <priv.h> +#include <pwd.h> +#include <grp.h> +#include <stddef.h> +#include <idmap.h> + +#include <sys/dnode.h> +#include <sys/spa.h> +#include <sys/zap.h> +#include <sys/misc.h> +#include <libzfs.h> + +#include "zfs_namecheck.h" +#include "zfs_prop.h" +#include "libzfs_impl.h" +#include "zfs_deleg.h" + +static int userquota_propname_decode(const char *propname, boolean_t zoned, + zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp); + +/* + * Given a single type (not a mask of types), return the type in a human + * readable form. + */ +const char * +zfs_type_to_name(zfs_type_t type) +{ + switch (type) { + case ZFS_TYPE_FILESYSTEM: + return (dgettext(TEXT_DOMAIN, "filesystem")); + case ZFS_TYPE_SNAPSHOT: + return (dgettext(TEXT_DOMAIN, "snapshot")); + case ZFS_TYPE_VOLUME: + return (dgettext(TEXT_DOMAIN, "volume")); + } + + return (NULL); +} + +/* + * Given a path and mask of ZFS types, return a string describing this dataset. + * This is used when we fail to open a dataset and we cannot get an exact type. + * We guess what the type would have been based on the path and the mask of + * acceptable types. + */ +static const char * +path_to_str(const char *path, int types) +{ + /* + * When given a single type, always report the exact type. + */ + if (types == ZFS_TYPE_SNAPSHOT) + return (dgettext(TEXT_DOMAIN, "snapshot")); + if (types == ZFS_TYPE_FILESYSTEM) + return (dgettext(TEXT_DOMAIN, "filesystem")); + if (types == ZFS_TYPE_VOLUME) + return (dgettext(TEXT_DOMAIN, "volume")); + + /* + * The user is requesting more than one type of dataset. If this is the + * case, consult the path itself. If we're looking for a snapshot, and + * a '@' is found, then report it as "snapshot". Otherwise, remove the + * snapshot attribute and try again. + */ + if (types & ZFS_TYPE_SNAPSHOT) { + if (strchr(path, '@') != NULL) + return (dgettext(TEXT_DOMAIN, "snapshot")); + return (path_to_str(path, types & ~ZFS_TYPE_SNAPSHOT)); + } + + /* + * The user has requested either filesystems or volumes. + * We have no way of knowing a priori what type this would be, so always + * report it as "filesystem" or "volume", our two primitive types. + */ + if (types & ZFS_TYPE_FILESYSTEM) + return (dgettext(TEXT_DOMAIN, "filesystem")); + + assert(types & ZFS_TYPE_VOLUME); + return (dgettext(TEXT_DOMAIN, "volume")); +} + +/* + * Validate a ZFS path. This is used even before trying to open the dataset, to + * provide a more meaningful error message. We call zfs_error_aux() to + * explain exactly why the name was not valid. + */ +int +zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, + boolean_t modifying) +{ + namecheck_err_t why; + char what; + + (void) zfs_prop_get_table(); + if (dataset_namecheck(path, &why, &what) != 0) { + if (hdl != NULL) { + switch (why) { + case NAME_ERR_TOOLONG: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "name is too long")); + break; + + case NAME_ERR_LEADING_SLASH: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "leading slash in name")); + break; + + case NAME_ERR_EMPTY_COMPONENT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "empty component in name")); + break; + + case NAME_ERR_TRAILING_SLASH: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "trailing slash in name")); + break; + + case NAME_ERR_INVALCHAR: + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "invalid character " + "'%c' in name"), what); + break; + + case NAME_ERR_MULTIPLE_AT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "multiple '@' delimiters in name")); + break; + + case NAME_ERR_NOLETTER: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool doesn't begin with a letter")); + break; + + case NAME_ERR_RESERVED: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "name is reserved")); + break; + + case NAME_ERR_DISKLIKE: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "reserved disk name")); + break; + } + } + + return (0); + } + + if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) { + if (hdl != NULL) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshot delimiter '@' in filesystem name")); + return (0); + } + + if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) { + if (hdl != NULL) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "missing '@' delimiter in snapshot name")); + return (0); + } + + if (modifying && strchr(path, '%') != NULL) { + if (hdl != NULL) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid character %c in name"), '%'); + return (0); + } + + return (-1); +} + +int +zfs_name_valid(const char *name, zfs_type_t type) +{ + if (type == ZFS_TYPE_POOL) + return (zpool_name_valid(NULL, B_FALSE, name)); + return (zfs_validate_name(NULL, name, type, B_FALSE)); +} + +/* + * This function takes the raw DSL properties, and filters out the user-defined + * properties into a separate nvlist. + */ +static nvlist_t * +process_user_props(zfs_handle_t *zhp, nvlist_t *props) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + nvpair_t *elem; + nvlist_t *propval; + nvlist_t *nvl; + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { + (void) no_memory(hdl); + return (NULL); + } + + elem = NULL; + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + if (!zfs_prop_user(nvpair_name(elem))) + continue; + + verify(nvpair_value_nvlist(elem, &propval) == 0); + if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) { + nvlist_free(nvl); + (void) no_memory(hdl); + return (NULL); + } + } + + return (nvl); +} + +static zpool_handle_t * +zpool_add_handle(zfs_handle_t *zhp, const char *pool_name) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + zpool_handle_t *zph; + + if ((zph = zpool_open_canfail(hdl, pool_name)) != NULL) { + if (hdl->libzfs_pool_handles != NULL) + zph->zpool_next = hdl->libzfs_pool_handles; + hdl->libzfs_pool_handles = zph; + } + return (zph); +} + +static zpool_handle_t * +zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + zpool_handle_t *zph = hdl->libzfs_pool_handles; + + while ((zph != NULL) && + (strncmp(pool_name, zpool_get_name(zph), len) != 0)) + zph = zph->zpool_next; + return (zph); +} + +/* + * Returns a handle to the pool that contains the provided dataset. + * If a handle to that pool already exists then that handle is returned. + * Otherwise, a new handle is created and added to the list of handles. + */ +static zpool_handle_t * +zpool_handle(zfs_handle_t *zhp) +{ + char *pool_name; + int len; + zpool_handle_t *zph; + + len = strcspn(zhp->zfs_name, "/@#") + 1; + pool_name = zfs_alloc(zhp->zfs_hdl, len); + (void) strlcpy(pool_name, zhp->zfs_name, len); + + zph = zpool_find_handle(zhp, pool_name, len); + if (zph == NULL) + zph = zpool_add_handle(zhp, pool_name); + + free(pool_name); + return (zph); +} + +void +zpool_free_handles(libzfs_handle_t *hdl) +{ + zpool_handle_t *next, *zph = hdl->libzfs_pool_handles; + + while (zph != NULL) { + next = zph->zpool_next; + zpool_close(zph); + zph = next; + } + hdl->libzfs_pool_handles = NULL; +} + +/* + * Utility function to gather stats (objset and zpl) for the given object. + */ +static int +get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + + (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); + + while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, zc) != 0) { + if (errno == ENOMEM) { + if (zcmd_expand_dst_nvlist(hdl, zc) != 0) { + return (-1); + } + } else { + return (-1); + } + } + return (0); +} + +/* + * Utility function to get the received properties of the given object. + */ +static int +get_recvd_props_ioctl(zfs_handle_t *zhp) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + nvlist_t *recvdprops; + zfs_cmd_t zc = { 0 }; + int err; + + if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) + return (-1); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) { + if (errno == ENOMEM) { + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + return (-1); + } + } else { + zcmd_free_nvlists(&zc); + return (-1); + } + } + + err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops); + zcmd_free_nvlists(&zc); + if (err != 0) + return (-1); + + nvlist_free(zhp->zfs_recvd_props); + zhp->zfs_recvd_props = recvdprops; + + return (0); +} + +static int +put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc) +{ + nvlist_t *allprops, *userprops; + + zhp->zfs_dmustats = zc->zc_objset_stats; /* structure assignment */ + + if (zcmd_read_dst_nvlist(zhp->zfs_hdl, zc, &allprops) != 0) { + return (-1); + } + + /* + * XXX Why do we store the user props separately, in addition to + * storing them in zfs_props? + */ + if ((userprops = process_user_props(zhp, allprops)) == NULL) { + nvlist_free(allprops); + return (-1); + } + + nvlist_free(zhp->zfs_props); + nvlist_free(zhp->zfs_user_props); + + zhp->zfs_props = allprops; + zhp->zfs_user_props = userprops; + + return (0); +} + +static int +get_stats(zfs_handle_t *zhp) +{ + int rc = 0; + zfs_cmd_t zc = { 0 }; + + if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) + return (-1); + if (get_stats_ioctl(zhp, &zc) != 0) + rc = -1; + else if (put_stats_zhdl(zhp, &zc) != 0) + rc = -1; + zcmd_free_nvlists(&zc); + return (rc); +} + +/* + * Refresh the properties currently stored in the handle. + */ +void +zfs_refresh_properties(zfs_handle_t *zhp) +{ + (void) get_stats(zhp); +} + +/* + * Makes a handle from the given dataset name. Used by zfs_open() and + * zfs_iter_* to create child handles on the fly. + */ +static int +make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) +{ + if (put_stats_zhdl(zhp, zc) != 0) + return (-1); + + /* + * We've managed to open the dataset and gather statistics. Determine + * the high-level type. + */ + if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) + zhp->zfs_head_type = ZFS_TYPE_VOLUME; + else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) + zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM; + else + abort(); + + if (zhp->zfs_dmustats.dds_is_snapshot) + zhp->zfs_type = ZFS_TYPE_SNAPSHOT; + else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) + zhp->zfs_type = ZFS_TYPE_VOLUME; + else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) + zhp->zfs_type = ZFS_TYPE_FILESYSTEM; + else + abort(); /* we should never see any other types */ + + if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) + return (-1); + + return (0); +} + +zfs_handle_t * +make_dataset_handle(libzfs_handle_t *hdl, const char *path) +{ + zfs_cmd_t zc = { 0 }; + + zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); + + if (zhp == NULL) + return (NULL); + + zhp->zfs_hdl = hdl; + (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); + if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) { + free(zhp); + return (NULL); + } + if (get_stats_ioctl(zhp, &zc) == -1) { + zcmd_free_nvlists(&zc); + free(zhp); + return (NULL); + } + if (make_dataset_handle_common(zhp, &zc) == -1) { + free(zhp); + zhp = NULL; + } + zcmd_free_nvlists(&zc); + return (zhp); +} + +zfs_handle_t * +make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_cmd_t *zc) +{ + zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); + + if (zhp == NULL) + return (NULL); + + zhp->zfs_hdl = hdl; + (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name)); + if (make_dataset_handle_common(zhp, zc) == -1) { + free(zhp); + return (NULL); + } + return (zhp); +} + +zfs_handle_t * +make_dataset_simple_handle_zc(zfs_handle_t *pzhp, zfs_cmd_t *zc) +{ + zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); + + if (zhp == NULL) + return (NULL); + + zhp->zfs_hdl = pzhp->zfs_hdl; + (void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name)); + zhp->zfs_head_type = pzhp->zfs_type; + zhp->zfs_type = ZFS_TYPE_SNAPSHOT; + zhp->zpool_hdl = zpool_handle(zhp); + return (zhp); +} + +zfs_handle_t * +zfs_handle_dup(zfs_handle_t *zhp_orig) +{ + zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); + + if (zhp == NULL) + return (NULL); + + zhp->zfs_hdl = zhp_orig->zfs_hdl; + zhp->zpool_hdl = zhp_orig->zpool_hdl; + (void) strlcpy(zhp->zfs_name, zhp_orig->zfs_name, + sizeof (zhp->zfs_name)); + zhp->zfs_type = zhp_orig->zfs_type; + zhp->zfs_head_type = zhp_orig->zfs_head_type; + zhp->zfs_dmustats = zhp_orig->zfs_dmustats; + if (zhp_orig->zfs_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_props, &zhp->zfs_props, 0) != 0) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + if (zhp_orig->zfs_user_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_user_props, + &zhp->zfs_user_props, 0) != 0) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + if (zhp_orig->zfs_recvd_props != NULL) { + if (nvlist_dup(zhp_orig->zfs_recvd_props, + &zhp->zfs_recvd_props, 0)) { + (void) no_memory(zhp->zfs_hdl); + zfs_close(zhp); + return (NULL); + } + } + zhp->zfs_mntcheck = zhp_orig->zfs_mntcheck; + if (zhp_orig->zfs_mntopts != NULL) { + zhp->zfs_mntopts = zfs_strdup(zhp_orig->zfs_hdl, + zhp_orig->zfs_mntopts); + } + zhp->zfs_props_table = zhp_orig->zfs_props_table; + return (zhp); +} + +boolean_t +zfs_bookmark_exists(const char *path) +{ + nvlist_t *bmarks; + nvlist_t *props; + char fsname[ZFS_MAXNAMELEN]; + char *bmark_name; + char *pound; + int err; + boolean_t rv; + + + (void) strlcpy(fsname, path, sizeof (fsname)); + pound = strchr(fsname, '#'); + if (pound == NULL) + return (B_FALSE); + + *pound = '\0'; + bmark_name = pound + 1; + props = fnvlist_alloc(); + err = lzc_get_bookmarks(fsname, props, &bmarks); + nvlist_free(props); + if (err != 0) { + nvlist_free(bmarks); + return (B_FALSE); + } + + rv = nvlist_exists(bmarks, bmark_name); + nvlist_free(bmarks); + return (rv); +} + +zfs_handle_t * +make_bookmark_handle(zfs_handle_t *parent, const char *path, + nvlist_t *bmark_props) +{ + zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); + + if (zhp == NULL) + return (NULL); + + /* Fill in the name. */ + zhp->zfs_hdl = parent->zfs_hdl; + (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); + + /* Set the property lists. */ + if (nvlist_dup(bmark_props, &zhp->zfs_props, 0) != 0) { + free(zhp); + return (NULL); + } + + /* Set the types. */ + zhp->zfs_head_type = parent->zfs_head_type; + zhp->zfs_type = ZFS_TYPE_BOOKMARK; + + if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) { + nvlist_free(zhp->zfs_props); + free(zhp); + return (NULL); + } + + return (zhp); +} + +/* + * Opens the given snapshot, filesystem, or volume. The 'types' + * argument is a mask of acceptable types. The function will print an + * appropriate error message and return NULL if it can't be opened. + */ +zfs_handle_t * +zfs_open(libzfs_handle_t *hdl, const char *path, int types) +{ + zfs_handle_t *zhp; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot open '%s'"), path); + + /* + * Validate the name before we even try to open it. + */ + if (!zfs_validate_name(hdl, path, ZFS_TYPE_DATASET, B_FALSE)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid dataset name")); + (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + return (NULL); + } + + /* + * Try to get stats for the dataset, which will tell us if it exists. + */ + errno = 0; + if ((zhp = make_dataset_handle(hdl, path)) == NULL) { + (void) zfs_standard_error(hdl, errno, errbuf); + return (NULL); + } + + if (zhp == NULL) { + char *at = strchr(path, '@'); + + if (at != NULL) + *at = '\0'; + errno = 0; + if ((zhp = make_dataset_handle(hdl, path)) == NULL) { + (void) zfs_standard_error(hdl, errno, errbuf); + return (NULL); + } + if (at != NULL) + *at = '@'; + (void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name)); + zhp->zfs_type = ZFS_TYPE_SNAPSHOT; + } + + if (!(types & zhp->zfs_type)) { + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + zfs_close(zhp); + return (NULL); + } + + return (zhp); +} + +/* + * Release a ZFS handle. Nothing to do but free the associated memory. + */ +void +zfs_close(zfs_handle_t *zhp) +{ + if (zhp->zfs_mntopts) + free(zhp->zfs_mntopts); + nvlist_free(zhp->zfs_props); + nvlist_free(zhp->zfs_user_props); + nvlist_free(zhp->zfs_recvd_props); + free(zhp); +} + +typedef struct mnttab_node { + struct mnttab mtn_mt; + avl_node_t mtn_node; +} mnttab_node_t; + +static int +libzfs_mnttab_cache_compare(const void *arg1, const void *arg2) +{ + const mnttab_node_t *mtn1 = arg1; + const mnttab_node_t *mtn2 = arg2; + int rv; + + rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special); + + if (rv == 0) + return (0); + return (rv > 0 ? 1 : -1); +} + +void +libzfs_mnttab_init(libzfs_handle_t *hdl) +{ + assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0); + avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare, + sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); +} + +void +libzfs_mnttab_update(libzfs_handle_t *hdl) +{ + struct mnttab entry; + + rewind(hdl->libzfs_mnttab); + while (getmntent(hdl->libzfs_mnttab, &entry) == 0) { + mnttab_node_t *mtn; + + if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) + continue; + mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); + mtn->mtn_mt.mnt_special = zfs_strdup(hdl, entry.mnt_special); + mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, entry.mnt_mountp); + mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, entry.mnt_fstype); + mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, entry.mnt_mntopts); + avl_add(&hdl->libzfs_mnttab_cache, mtn); + } +} + +void +libzfs_mnttab_fini(libzfs_handle_t *hdl) +{ + void *cookie = NULL; + mnttab_node_t *mtn; + + while (mtn = avl_destroy_nodes(&hdl->libzfs_mnttab_cache, &cookie)) { + free(mtn->mtn_mt.mnt_special); + free(mtn->mtn_mt.mnt_mountp); + free(mtn->mtn_mt.mnt_fstype); + free(mtn->mtn_mt.mnt_mntopts); + free(mtn); + } + avl_destroy(&hdl->libzfs_mnttab_cache); +} + +void +libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable) +{ + hdl->libzfs_mnttab_enable = enable; +} + +int +libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, + struct mnttab *entry) +{ + mnttab_node_t find; + mnttab_node_t *mtn; + + if (!hdl->libzfs_mnttab_enable) { + struct mnttab srch = { 0 }; + + if (avl_numnodes(&hdl->libzfs_mnttab_cache)) + libzfs_mnttab_fini(hdl); + rewind(hdl->libzfs_mnttab); + srch.mnt_special = (char *)fsname; + srch.mnt_fstype = MNTTYPE_ZFS; + if (getmntany(hdl->libzfs_mnttab, entry, &srch) == 0) + return (0); + else + return (ENOENT); + } + + if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) + libzfs_mnttab_update(hdl); + + find.mtn_mt.mnt_special = (char *)fsname; + mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL); + if (mtn) { + *entry = mtn->mtn_mt; + return (0); + } + return (ENOENT); +} + +void +libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special, + const char *mountp, const char *mntopts) +{ + mnttab_node_t *mtn; + + if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) + return; + mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); + mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); + mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); + mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); + mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); + avl_add(&hdl->libzfs_mnttab_cache, mtn); +} + +void +libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) +{ + mnttab_node_t find; + mnttab_node_t *ret; + + find.mtn_mt.mnt_special = (char *)fsname; + if (ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL)) { + avl_remove(&hdl->libzfs_mnttab_cache, ret); + free(ret->mtn_mt.mnt_special); + free(ret->mtn_mt.mnt_mountp); + free(ret->mtn_mt.mnt_fstype); + free(ret->mtn_mt.mnt_mntopts); + free(ret); + } +} + +int +zfs_spa_version(zfs_handle_t *zhp, int *spa_version) +{ + zpool_handle_t *zpool_handle = zhp->zpool_hdl; + + if (zpool_handle == NULL) + return (-1); + + *spa_version = zpool_get_prop_int(zpool_handle, + ZPOOL_PROP_VERSION, NULL); + return (0); +} + +/* + * The choice of reservation property depends on the SPA version. + */ +static int +zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop) +{ + int spa_version; + + if (zfs_spa_version(zhp, &spa_version) < 0) + return (-1); + + if (spa_version >= SPA_VERSION_REFRESERVATION) + *resv_prop = ZFS_PROP_REFRESERVATION; + else + *resv_prop = ZFS_PROP_RESERVATION; + + return (0); +} + +/* + * Given an nvlist of properties to set, validates that they are correct, and + * parses any numeric properties (index, boolean, etc) if they are specified as + * strings. + */ +nvlist_t * +zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, + uint64_t zoned, zfs_handle_t *zhp, const char *errbuf) +{ + nvpair_t *elem; + uint64_t intval; + char *strval; + zfs_prop_t prop; + nvlist_t *ret; + int chosen_normal = -1; + int chosen_utf = -1; + + if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) { + (void) no_memory(hdl); + return (NULL); + } + + /* + * Make sure this property is valid and applies to this type. + */ + + elem = NULL; + while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + const char *propname = nvpair_name(elem); + + prop = zfs_name_to_prop(propname); + if (prop == ZPROP_INVAL && zfs_prop_user(propname)) { + /* + * This is a user property: make sure it's a + * string, and that it's less than ZAP_MAXNAMELEN. + */ + if (nvpair_type(elem) != DATA_TYPE_STRING) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be a string"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property name '%s' is too long"), + propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + (void) nvpair_value_string(elem, &strval); + if (nvlist_add_string(ret, propname, strval) != 0) { + (void) no_memory(hdl); + goto error; + } + continue; + } + + /* + * Currently, only user properties can be modified on + * snapshots. + */ + if (type == ZFS_TYPE_SNAPSHOT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "this property can not be modified for snapshots")); + (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf); + goto error; + } + + if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) { + zfs_userquota_prop_t uqtype; + char newpropname[128]; + char domain[128]; + uint64_t rid; + uint64_t valary[3]; + + if (userquota_propname_decode(propname, zoned, + &uqtype, domain, sizeof (domain), &rid) != 0) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "'%s' has an invalid user/group name"), + propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (uqtype != ZFS_PROP_USERQUOTA && + uqtype != ZFS_PROP_GROUPQUOTA) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "'%s' is readonly"), + propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, + errbuf); + goto error; + } + + if (nvpair_type(elem) == DATA_TYPE_STRING) { + (void) nvpair_value_string(elem, &strval); + if (strcmp(strval, "none") == 0) { + intval = 0; + } else if (zfs_nicestrtonum(hdl, + strval, &intval) != 0) { + (void) zfs_error(hdl, + EZFS_BADPROP, errbuf); + goto error; + } + } else if (nvpair_type(elem) == + DATA_TYPE_UINT64) { + (void) nvpair_value_uint64(elem, &intval); + if (intval == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "use 'none' to disable " + "userquota/groupquota")); + goto error; + } + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be a number"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + /* + * Encode the prop name as + * userquota@<hex-rid>-domain, to make it easy + * for the kernel to decode. + */ + (void) snprintf(newpropname, sizeof (newpropname), + "%s%llx-%s", zfs_userquota_prop_prefixes[uqtype], + (longlong_t)rid, domain); + valary[0] = uqtype; + valary[1] = rid; + valary[2] = intval; + if (nvlist_add_uint64_array(ret, newpropname, + valary, 3) != 0) { + (void) no_memory(hdl); + goto error; + } + continue; + } else if (prop == ZPROP_INVAL && zfs_prop_written(propname)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is readonly"), + propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); + goto error; + } + + if (prop == ZPROP_INVAL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid property '%s'"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (!zfs_prop_valid_for_type(prop, type)) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "'%s' does not " + "apply to datasets of this type"), propname); + (void) zfs_error(hdl, EZFS_PROPTYPE, errbuf); + goto error; + } + + if (zfs_prop_readonly(prop) && + (!zfs_prop_setonce(prop) || zhp != NULL)) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "'%s' is readonly"), + propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); + goto error; + } + + if (zprop_parse_value(hdl, elem, prop, type, ret, + &strval, &intval, errbuf) != 0) + goto error; + + /* + * Perform some additional checks for specific properties. + */ + switch (prop) { + case ZFS_PROP_VERSION: + { + int version; + + if (zhp == NULL) + break; + version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); + if (intval < version) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Can not downgrade; already at version %u"), + version); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + } + + case ZFS_PROP_VOLBLOCKSIZE: + case ZFS_PROP_RECORDSIZE: + { + int maxbs = SPA_MAXBLOCKSIZE; + if (zhp != NULL) { + maxbs = zpool_get_prop_int(zhp->zpool_hdl, + ZPOOL_PROP_MAXBLOCKSIZE, NULL); + } + /* + * Volumes are limited to a volblocksize of 128KB, + * because they typically service workloads with + * small random writes, which incur a large performance + * penalty with large blocks. + */ + if (prop == ZFS_PROP_VOLBLOCKSIZE) + maxbs = SPA_OLD_MAXBLOCKSIZE; + /* + * The value must be a power of two between + * SPA_MINBLOCKSIZE and maxbs. + */ + if (intval < SPA_MINBLOCKSIZE || + intval > maxbs || !ISP2(intval)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be power of 2 from 512B " + "to %uKB"), propname, maxbs >> 10); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + } + case ZFS_PROP_MLSLABEL: + { +#ifdef sun + /* + * Verify the mlslabel string and convert to + * internal hex label string. + */ + + m_label_t *new_sl; + char *hex = NULL; /* internal label string */ + + /* Default value is already OK. */ + if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) + break; + + /* Verify the label can be converted to binary form */ + if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) || + (str_to_label(strval, &new_sl, MAC_LABEL, + L_NO_CORRECTION, NULL) == -1)) { + goto badlabel; + } + + /* Now translate to hex internal label string */ + if (label_to_str(new_sl, &hex, M_INTERNAL, + DEF_NAMES) != 0) { + if (hex) + free(hex); + goto badlabel; + } + m_label_free(new_sl); + + /* If string is already in internal form, we're done. */ + if (strcmp(strval, hex) == 0) { + free(hex); + break; + } + + /* Replace the label string with the internal form. */ + (void) nvlist_remove(ret, zfs_prop_to_name(prop), + DATA_TYPE_STRING); + verify(nvlist_add_string(ret, zfs_prop_to_name(prop), + hex) == 0); + free(hex); + + break; + +badlabel: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid mlslabel '%s'"), strval); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + m_label_free(new_sl); /* OK if null */ +#else /* !sun */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "mlslabel is not supported on FreeBSD")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); +#endif /* !sun */ + goto error; + + } + + case ZFS_PROP_MOUNTPOINT: + { + namecheck_err_t why; + + if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 || + strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0) + break; + + if (mountpoint_namecheck(strval, &why)) { + switch (why) { + case NAME_ERR_LEADING_SLASH: + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "'%s' must be an absolute path, " + "'none', or 'legacy'"), propname); + break; + case NAME_ERR_TOOLONG: + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "component of '%s' is too long"), + propname); + break; + } + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + } + + /*FALLTHRU*/ + + case ZFS_PROP_SHARESMB: + case ZFS_PROP_SHARENFS: + /* + * For the mountpoint and sharenfs or sharesmb + * properties, check if it can be set in a + * global/non-global zone based on + * the zoned property value: + * + * global zone non-global zone + * -------------------------------------------------- + * zoned=on mountpoint (no) mountpoint (yes) + * sharenfs (no) sharenfs (no) + * sharesmb (no) sharesmb (no) + * + * zoned=off mountpoint (yes) N/A + * sharenfs (yes) + * sharesmb (yes) + */ + if (zoned) { + if (getzoneid() == GLOBAL_ZONEID) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' cannot be set on " + "dataset in a non-global zone"), + propname); + (void) zfs_error(hdl, EZFS_ZONED, + errbuf); + goto error; + } else if (prop == ZFS_PROP_SHARENFS || + prop == ZFS_PROP_SHARESMB) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' cannot be set in " + "a non-global zone"), propname); + (void) zfs_error(hdl, EZFS_ZONED, + errbuf); + goto error; + } + } else if (getzoneid() != GLOBAL_ZONEID) { + /* + * If zoned property is 'off', this must be in + * a global zone. If not, something is wrong. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' cannot be set while dataset " + "'zoned' property is set"), propname); + (void) zfs_error(hdl, EZFS_ZONED, errbuf); + goto error; + } + + /* + * At this point, it is legitimate to set the + * property. Now we want to make sure that the + * property value is valid if it is sharenfs. + */ + if ((prop == ZFS_PROP_SHARENFS || + prop == ZFS_PROP_SHARESMB) && + strcmp(strval, "on") != 0 && + strcmp(strval, "off") != 0) { + zfs_share_proto_t proto; + + if (prop == ZFS_PROP_SHARESMB) + proto = PROTO_SMB; + else + proto = PROTO_NFS; + + /* + * Must be an valid sharing protocol + * option string so init the libshare + * in order to enable the parser and + * then parse the options. We use the + * control API since we don't care about + * the current configuration and don't + * want the overhead of loading it + * until we actually do something. + */ + + if (zfs_init_libshare(hdl, + SA_INIT_CONTROL_API) != SA_OK) { + /* + * An error occurred so we can't do + * anything + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' cannot be set: problem " + "in share initialization"), + propname); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + + if (zfs_parse_options(strval, proto) != SA_OK) { + /* + * There was an error in parsing so + * deal with it by issuing an error + * message and leaving after + * uninitializing the the libshare + * interface. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' cannot be set to invalid " + "options"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + zfs_uninit_libshare(hdl); + goto error; + } + zfs_uninit_libshare(hdl); + } + + break; + case ZFS_PROP_UTF8ONLY: + chosen_utf = (int)intval; + break; + case ZFS_PROP_NORMALIZE: + chosen_normal = (int)intval; + break; + } + + /* + * For changes to existing volumes, we have some additional + * checks to enforce. + */ + if (type == ZFS_TYPE_VOLUME && zhp != NULL) { + uint64_t volsize = zfs_prop_get_int(zhp, + ZFS_PROP_VOLSIZE); + uint64_t blocksize = zfs_prop_get_int(zhp, + ZFS_PROP_VOLBLOCKSIZE); + char buf[64]; + + switch (prop) { + case ZFS_PROP_RESERVATION: + case ZFS_PROP_REFRESERVATION: + if (intval > volsize) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is greater than current " + "volume size"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + break; + + case ZFS_PROP_VOLSIZE: + if (intval % blocksize != 0) { + zfs_nicenum(blocksize, buf, + sizeof (buf)); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be a multiple of " + "volume block size (%s)"), + propname, buf); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + + if (intval == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' cannot be zero"), + propname); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + break; + } + } + } + + /* + * If normalization was chosen, but no UTF8 choice was made, + * enforce rejection of non-UTF8 names. + * + * If normalization was chosen, but rejecting non-UTF8 names + * was explicitly not chosen, it is an error. + */ + if (chosen_normal > 0 && chosen_utf < 0) { + if (nvlist_add_uint64(ret, + zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) { + (void) no_memory(hdl); + goto error; + } + } else if (chosen_normal > 0 && chosen_utf == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be set 'on' if normalization chosen"), + zfs_prop_to_name(ZFS_PROP_UTF8ONLY)); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + return (ret); + +error: + nvlist_free(ret); + return (NULL); +} + +int +zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl) +{ + uint64_t old_volsize; + uint64_t new_volsize; + uint64_t old_reservation; + uint64_t new_reservation; + zfs_prop_t resv_prop; + + /* + * If this is an existing volume, and someone is setting the volsize, + * make sure that it matches the reservation, or add it if necessary. + */ + old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); + if (zfs_which_resv_prop(zhp, &resv_prop) < 0) + return (-1); + old_reservation = zfs_prop_get_int(zhp, resv_prop); + if ((zvol_volsize_to_reservation(old_volsize, zhp->zfs_props) != + old_reservation) || nvlist_lookup_uint64(nvl, + zfs_prop_to_name(resv_prop), &new_reservation) != ENOENT) { + return (0); + } + if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE), + &new_volsize) != 0) + return (-1); + new_reservation = zvol_volsize_to_reservation(new_volsize, + zhp->zfs_props); + if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop), + new_reservation) != 0) { + (void) no_memory(zhp->zfs_hdl); + return (-1); + } + return (1); +} + +void +zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, + char *errbuf) +{ + switch (err) { + + case ENOSPC: + /* + * For quotas and reservations, ENOSPC indicates + * something different; setting a quota or reservation + * doesn't use any disk space. + */ + switch (prop) { + case ZFS_PROP_QUOTA: + case ZFS_PROP_REFQUOTA: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "size is less than current used or " + "reserved space")); + (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); + break; + + case ZFS_PROP_RESERVATION: + case ZFS_PROP_REFRESERVATION: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "size is greater than available space")); + (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); + break; + + default: + (void) zfs_standard_error(hdl, err, errbuf); + break; + } + break; + + case EBUSY: + (void) zfs_standard_error(hdl, EBUSY, errbuf); + break; + + case EROFS: + (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf); + break; + + case E2BIG: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property value too long")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + break; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool and or dataset must be upgraded to set this " + "property or value")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + + case ERANGE: + case EDOM: + if (prop == ZFS_PROP_COMPRESSION || + prop == ZFS_PROP_RECORDSIZE) { + (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property setting is not allowed on " + "bootable datasets")); + (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); + } else { + (void) zfs_standard_error(hdl, err, errbuf); + } + break; + + case EINVAL: + if (prop == ZPROP_INVAL) { + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + } else { + (void) zfs_standard_error(hdl, err, errbuf); + } + break; + + case EOVERFLOW: + /* + * This platform can't address a volume this big. + */ +#ifdef _ILP32 + if (prop == ZFS_PROP_VOLSIZE) { + (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf); + break; + } +#endif + /* FALLTHROUGH */ + default: + (void) zfs_standard_error(hdl, err, errbuf); + } +} + +/* + * Given a property name and value, set the property for the given dataset. + */ +int +zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) +{ + zfs_cmd_t zc = { 0 }; + int ret = -1; + prop_changelist_t *cl = NULL; + char errbuf[1024]; + libzfs_handle_t *hdl = zhp->zfs_hdl; + nvlist_t *nvl = NULL, *realprops; + zfs_prop_t prop; + boolean_t do_prefix = B_TRUE; + int added_resv; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), + zhp->zfs_name); + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 || + nvlist_add_string(nvl, propname, propval) != 0) { + (void) no_memory(hdl); + goto error; + } + + if ((realprops = zfs_valid_proplist(hdl, zhp->zfs_type, nvl, + zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL) + goto error; + + nvlist_free(nvl); + nvl = realprops; + + prop = zfs_name_to_prop(propname); + + /* We don't support those properties on FreeBSD. */ + switch (prop) { + case ZFS_PROP_DEVICES: + case ZFS_PROP_ISCSIOPTIONS: + case ZFS_PROP_XATTR: + case ZFS_PROP_VSCAN: + case ZFS_PROP_NBMAND: + case ZFS_PROP_MLSLABEL: + (void) snprintf(errbuf, sizeof (errbuf), + "property '%s' not supported on FreeBSD", propname); + ret = zfs_error(hdl, EZFS_PERM, errbuf); + goto error; + } + + if (prop == ZFS_PROP_VOLSIZE) { + if ((added_resv = zfs_add_synthetic_resv(zhp, nvl)) == -1) + goto error; + } + + if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL) + goto error; + + if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "child dataset with inherited mountpoint is used " + "in a non-global zone")); + ret = zfs_error(hdl, EZFS_ZONED, errbuf); + goto error; + } + + /* + * We don't want to unmount & remount the dataset when changing + * its canmount property to 'on' or 'noauto'. We only use + * the changelist logic to unmount when setting canmount=off. + */ + if (prop == ZFS_PROP_CANMOUNT) { + uint64_t idx; + int err = zprop_string_to_index(prop, propval, &idx, + ZFS_TYPE_DATASET); + if (err == 0 && idx != ZFS_CANMOUNT_OFF) + do_prefix = B_FALSE; + } + + if (do_prefix && (ret = changelist_prefix(cl)) != 0) + goto error; + + /* + * Execute the corresponding ioctl() to set this property. + */ + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0) + goto error; + + ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); + + if (ret != 0) { + zfs_setprop_error(hdl, prop, errno, errbuf); + if (added_resv && errno == ENOSPC) { + /* clean up the volsize property we tried to set */ + uint64_t old_volsize = zfs_prop_get_int(zhp, + ZFS_PROP_VOLSIZE); + nvlist_free(nvl); + zcmd_free_nvlists(&zc); + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) + goto error; + if (nvlist_add_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), + old_volsize) != 0) + goto error; + if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0) + goto error; + (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); + } + } else { + if (do_prefix) + ret = changelist_postfix(cl); + + /* + * Refresh the statistics so the new property value + * is reflected. + */ + if (ret == 0) + (void) get_stats(zhp); + } + +error: + nvlist_free(nvl); + zcmd_free_nvlists(&zc); + if (cl) + changelist_free(cl); + return (ret); +} + +/* + * Given a property, inherit the value from the parent dataset, or if received + * is TRUE, revert to the received value, if any. + */ +int +zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received) +{ + zfs_cmd_t zc = { 0 }; + int ret; + prop_changelist_t *cl; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + zfs_prop_t prop; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot inherit %s for '%s'"), propname, zhp->zfs_name); + + zc.zc_cookie = received; + if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) { + /* + * For user properties, the amount of work we have to do is very + * small, so just do it here. + */ + if (!zfs_prop_user(propname)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid property")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value)); + + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0) + return (zfs_standard_error(hdl, errno, errbuf)); + + return (0); + } + + /* + * Verify that this property is inheritable. + */ + if (zfs_prop_readonly(prop)) + return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf)); + + if (!zfs_prop_inheritable(prop) && !received) + return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf)); + + /* + * Check to see if the value applies to this type + */ + if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) + return (zfs_error(hdl, EZFS_PROPTYPE, errbuf)); + + /* + * Normalize the name, to get rid of shorthand abbreviations. + */ + propname = zfs_prop_to_name(prop); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value)); + + if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID && + zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset is used in a non-global zone")); + return (zfs_error(hdl, EZFS_ZONED, errbuf)); + } + + /* + * Determine datasets which will be affected by this change, if any. + */ + if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL) + return (-1); + + if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "child dataset with inherited mountpoint is used " + "in a non-global zone")); + ret = zfs_error(hdl, EZFS_ZONED, errbuf); + goto error; + } + + if ((ret = changelist_prefix(cl)) != 0) + goto error; + + if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) { + return (zfs_standard_error(hdl, errno, errbuf)); + } else { + + if ((ret = changelist_postfix(cl)) != 0) + goto error; + + /* + * Refresh the statistics so the new property is reflected. + */ + (void) get_stats(zhp); + } + +error: + changelist_free(cl); + return (ret); +} + +/* + * True DSL properties are stored in an nvlist. The following two functions + * extract them appropriately. + */ +static uint64_t +getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source) +{ + nvlist_t *nv; + uint64_t value; + + *source = NULL; + if (nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(prop), &nv) == 0) { + verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0); + (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); + } else { + verify(!zhp->zfs_props_table || + zhp->zfs_props_table[prop] == B_TRUE); + value = zfs_prop_default_numeric(prop); + *source = ""; + } + + return (value); +} + +static char * +getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) +{ + nvlist_t *nv; + char *value; + + *source = NULL; + if (nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(prop), &nv) == 0) { + verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0); + (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); + } else { + verify(!zhp->zfs_props_table || + zhp->zfs_props_table[prop] == B_TRUE); + if ((value = (char *)zfs_prop_default_string(prop)) == NULL) + value = ""; + *source = ""; + } + + return (value); +} + +static boolean_t +zfs_is_recvd_props_mode(zfs_handle_t *zhp) +{ + return (zhp->zfs_props == zhp->zfs_recvd_props); +} + +static void +zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) +{ + *cookie = (uint64_t)(uintptr_t)zhp->zfs_props; + zhp->zfs_props = zhp->zfs_recvd_props; +} + +static void +zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) +{ + zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie; + *cookie = 0; +} + +/* + * Internal function for getting a numeric property. Both zfs_prop_get() and + * zfs_prop_get_int() are built using this interface. + * + * Certain properties can be overridden using 'mount -o'. In this case, scan + * the contents of the /etc/mnttab entry, searching for the appropriate options. + * If they differ from the on-disk values, report the current values and mark + * the source "temporary". + */ +static int +get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, + char **source, uint64_t *val) +{ + zfs_cmd_t zc = { 0 }; + nvlist_t *zplprops = NULL; + struct mnttab mnt; + char *mntopt_on = NULL; + char *mntopt_off = NULL; + boolean_t received = zfs_is_recvd_props_mode(zhp); + + *source = NULL; + + switch (prop) { + case ZFS_PROP_ATIME: + mntopt_on = MNTOPT_ATIME; + mntopt_off = MNTOPT_NOATIME; + break; + + case ZFS_PROP_DEVICES: + mntopt_on = MNTOPT_DEVICES; + mntopt_off = MNTOPT_NODEVICES; + break; + + case ZFS_PROP_EXEC: + mntopt_on = MNTOPT_EXEC; + mntopt_off = MNTOPT_NOEXEC; + break; + + case ZFS_PROP_READONLY: + mntopt_on = MNTOPT_RO; + mntopt_off = MNTOPT_RW; + break; + + case ZFS_PROP_SETUID: + mntopt_on = MNTOPT_SETUID; + mntopt_off = MNTOPT_NOSETUID; + break; + + case ZFS_PROP_XATTR: + mntopt_on = MNTOPT_XATTR; + mntopt_off = MNTOPT_NOXATTR; + break; + + case ZFS_PROP_NBMAND: + mntopt_on = MNTOPT_NBMAND; + mntopt_off = MNTOPT_NONBMAND; + break; + } + + /* + * Because looking up the mount options is potentially expensive + * (iterating over all of /etc/mnttab), we defer its calculation until + * we're looking up a property which requires its presence. + */ + if (!zhp->zfs_mntcheck && + (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) { + libzfs_handle_t *hdl = zhp->zfs_hdl; + struct mnttab entry; + + if (libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0) { + zhp->zfs_mntopts = zfs_strdup(hdl, + entry.mnt_mntopts); + if (zhp->zfs_mntopts == NULL) + return (-1); + } + + zhp->zfs_mntcheck = B_TRUE; + } + + if (zhp->zfs_mntopts == NULL) + mnt.mnt_mntopts = ""; + else + mnt.mnt_mntopts = zhp->zfs_mntopts; + + switch (prop) { + case ZFS_PROP_ATIME: + case ZFS_PROP_DEVICES: + case ZFS_PROP_EXEC: + case ZFS_PROP_READONLY: + case ZFS_PROP_SETUID: + case ZFS_PROP_XATTR: + case ZFS_PROP_NBMAND: + *val = getprop_uint64(zhp, prop, source); + + if (received) + break; + + if (hasmntopt(&mnt, mntopt_on) && !*val) { + *val = B_TRUE; + if (src) + *src = ZPROP_SRC_TEMPORARY; + } else if (hasmntopt(&mnt, mntopt_off) && *val) { + *val = B_FALSE; + if (src) + *src = ZPROP_SRC_TEMPORARY; + } + break; + + case ZFS_PROP_CANMOUNT: + case ZFS_PROP_VOLSIZE: + case ZFS_PROP_QUOTA: + case ZFS_PROP_REFQUOTA: + case ZFS_PROP_RESERVATION: + case ZFS_PROP_REFRESERVATION: + case ZFS_PROP_FILESYSTEM_LIMIT: + case ZFS_PROP_SNAPSHOT_LIMIT: + case ZFS_PROP_FILESYSTEM_COUNT: + case ZFS_PROP_SNAPSHOT_COUNT: + *val = getprop_uint64(zhp, prop, source); + + if (*source == NULL) { + /* not default, must be local */ + *source = zhp->zfs_name; + } + break; + + case ZFS_PROP_MOUNTED: + *val = (zhp->zfs_mntopts != NULL); + break; + + case ZFS_PROP_NUMCLONES: + *val = zhp->zfs_dmustats.dds_num_clones; + break; + + case ZFS_PROP_VERSION: + case ZFS_PROP_NORMALIZE: + case ZFS_PROP_UTF8ONLY: + case ZFS_PROP_CASE: + if (!zfs_prop_valid_for_type(prop, zhp->zfs_head_type) || + zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) + return (-1); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) { + zcmd_free_nvlists(&zc); + return (-1); + } + if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 || + nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop), + val) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + if (zplprops) + nvlist_free(zplprops); + zcmd_free_nvlists(&zc); + break; + + case ZFS_PROP_INCONSISTENT: + *val = zhp->zfs_dmustats.dds_inconsistent; + break; + + default: + switch (zfs_prop_get_type(prop)) { + case PROP_TYPE_NUMBER: + case PROP_TYPE_INDEX: + *val = getprop_uint64(zhp, prop, source); + /* + * If we tried to use a default value for a + * readonly property, it means that it was not + * present. + */ + if (zfs_prop_readonly(prop) && + *source != NULL && (*source)[0] == '\0') { + *source = NULL; + } + break; + + case PROP_TYPE_STRING: + default: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "cannot get non-numeric property")); + return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP, + dgettext(TEXT_DOMAIN, "internal error"))); + } + } + + return (0); +} + +/* + * Calculate the source type, given the raw source string. + */ +static void +get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source, + char *statbuf, size_t statlen) +{ + if (statbuf == NULL || *srctype == ZPROP_SRC_TEMPORARY) + return; + + if (source == NULL) { + *srctype = ZPROP_SRC_NONE; + } else if (source[0] == '\0') { + *srctype = ZPROP_SRC_DEFAULT; + } else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) { + *srctype = ZPROP_SRC_RECEIVED; + } else { + if (strcmp(source, zhp->zfs_name) == 0) { + *srctype = ZPROP_SRC_LOCAL; + } else { + (void) strlcpy(statbuf, source, statlen); + *srctype = ZPROP_SRC_INHERITED; + } + } + +} + +int +zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf, + size_t proplen, boolean_t literal) +{ + zfs_prop_t prop; + int err = 0; + + if (zhp->zfs_recvd_props == NULL) + if (get_recvd_props_ioctl(zhp) != 0) + return (-1); + + prop = zfs_name_to_prop(propname); + + if (prop != ZPROP_INVAL) { + uint64_t cookie; + if (!nvlist_exists(zhp->zfs_recvd_props, propname)) + return (-1); + zfs_set_recvd_props_mode(zhp, &cookie); + err = zfs_prop_get(zhp, prop, propbuf, proplen, + NULL, NULL, 0, literal); + zfs_unset_recvd_props_mode(zhp, &cookie); + } else { + nvlist_t *propval; + char *recvdval; + if (nvlist_lookup_nvlist(zhp->zfs_recvd_props, + propname, &propval) != 0) + return (-1); + verify(nvlist_lookup_string(propval, ZPROP_VALUE, + &recvdval) == 0); + (void) strlcpy(propbuf, recvdval, proplen); + } + + return (err == 0 ? 0 : -1); +} + +static int +get_clones_string(zfs_handle_t *zhp, char *propbuf, size_t proplen) +{ + nvlist_t *value; + nvpair_t *pair; + + value = zfs_get_clones_nvl(zhp); + if (value == NULL) + return (-1); + + propbuf[0] = '\0'; + for (pair = nvlist_next_nvpair(value, NULL); pair != NULL; + pair = nvlist_next_nvpair(value, pair)) { + if (propbuf[0] != '\0') + (void) strlcat(propbuf, ",", proplen); + (void) strlcat(propbuf, nvpair_name(pair), proplen); + } + + return (0); +} + +struct get_clones_arg { + uint64_t numclones; + nvlist_t *value; + const char *origin; + char buf[ZFS_MAXNAMELEN]; +}; + +int +get_clones_cb(zfs_handle_t *zhp, void *arg) +{ + struct get_clones_arg *gca = arg; + + if (gca->numclones == 0) { + zfs_close(zhp); + return (0); + } + + if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, gca->buf, sizeof (gca->buf), + NULL, NULL, 0, B_TRUE) != 0) + goto out; + if (strcmp(gca->buf, gca->origin) == 0) { + fnvlist_add_boolean(gca->value, zfs_get_name(zhp)); + gca->numclones--; + } + +out: + (void) zfs_iter_children(zhp, get_clones_cb, gca); + zfs_close(zhp); + return (0); +} + +nvlist_t * +zfs_get_clones_nvl(zfs_handle_t *zhp) +{ + nvlist_t *nv, *value; + + if (nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), &nv) != 0) { + struct get_clones_arg gca; + + /* + * if this is a snapshot, then the kernel wasn't able + * to get the clones. Do it by slowly iterating. + */ + if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) + return (NULL); + if (nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) != 0) + return (NULL); + if (nvlist_alloc(&value, NV_UNIQUE_NAME, 0) != 0) { + nvlist_free(nv); + return (NULL); + } + + gca.numclones = zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES); + gca.value = value; + gca.origin = zhp->zfs_name; + + if (gca.numclones != 0) { + zfs_handle_t *root; + char pool[ZFS_MAXNAMELEN]; + char *cp = pool; + + /* get the pool name */ + (void) strlcpy(pool, zhp->zfs_name, sizeof (pool)); + (void) strsep(&cp, "/@"); + root = zfs_open(zhp->zfs_hdl, pool, + ZFS_TYPE_FILESYSTEM); + + (void) get_clones_cb(root, &gca); + } + + if (gca.numclones != 0 || + nvlist_add_nvlist(nv, ZPROP_VALUE, value) != 0 || + nvlist_add_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), nv) != 0) { + nvlist_free(nv); + nvlist_free(value); + return (NULL); + } + nvlist_free(nv); + nvlist_free(value); + verify(0 == nvlist_lookup_nvlist(zhp->zfs_props, + zfs_prop_to_name(ZFS_PROP_CLONES), &nv)); + } + + verify(nvlist_lookup_nvlist(nv, ZPROP_VALUE, &value) == 0); + + return (value); +} + +/* + * Retrieve a property from the given object. If 'literal' is specified, then + * numbers are left as exact values. Otherwise, numbers are converted to a + * human-readable form. + * + * Returns 0 on success, or -1 on error. + */ +int +zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, + zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal) +{ + char *source = NULL; + uint64_t val; + char *str; + const char *strval; + boolean_t received = zfs_is_recvd_props_mode(zhp); + + /* + * Check to see if this property applies to our object + */ + if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) + return (-1); + + if (received && zfs_prop_readonly(prop)) + return (-1); + + if (src) + *src = ZPROP_SRC_NONE; + + switch (prop) { + case ZFS_PROP_CREATION: + /* + * 'creation' is a time_t stored in the statistics. We convert + * this into a string unless 'literal' is specified. + */ + { + val = getprop_uint64(zhp, prop, &source); + time_t time = (time_t)val; + struct tm t; + + if (literal || + localtime_r(&time, &t) == NULL || + strftime(propbuf, proplen, "%a %b %e %k:%M %Y", + &t) == 0) + (void) snprintf(propbuf, proplen, "%llu", val); + } + break; + + case ZFS_PROP_MOUNTPOINT: + /* + * Getting the precise mountpoint can be tricky. + * + * - for 'none' or 'legacy', return those values. + * - for inherited mountpoints, we want to take everything + * after our ancestor and append it to the inherited value. + * + * If the pool has an alternate root, we want to prepend that + * root to any values we return. + */ + + str = getprop_string(zhp, prop, &source); + + if (str[0] == '/') { + char buf[MAXPATHLEN]; + char *root = buf; + const char *relpath; + + /* + * If we inherit the mountpoint, even from a dataset + * with a received value, the source will be the path of + * the dataset we inherit from. If source is + * ZPROP_SOURCE_VAL_RECVD, the received value is not + * inherited. + */ + if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) { + relpath = ""; + } else { + relpath = zhp->zfs_name + strlen(source); + if (relpath[0] == '/') + relpath++; + } + + if ((zpool_get_prop(zhp->zpool_hdl, + ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL, + B_FALSE)) || (strcmp(root, "-") == 0)) + root[0] = '\0'; + /* + * Special case an alternate root of '/'. This will + * avoid having multiple leading slashes in the + * mountpoint path. + */ + if (strcmp(root, "/") == 0) + root++; + + /* + * If the mountpoint is '/' then skip over this + * if we are obtaining either an alternate root or + * an inherited mountpoint. + */ + if (str[1] == '\0' && (root[0] != '\0' || + relpath[0] != '\0')) + str++; + + if (relpath[0] == '\0') + (void) snprintf(propbuf, proplen, "%s%s", + root, str); + else + (void) snprintf(propbuf, proplen, "%s%s%s%s", + root, str, relpath[0] == '@' ? "" : "/", + relpath); + } else { + /* 'legacy' or 'none' */ + (void) strlcpy(propbuf, str, proplen); + } + + break; + + case ZFS_PROP_ORIGIN: + (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), + proplen); + /* + * If there is no parent at all, return failure to indicate that + * it doesn't apply to this dataset. + */ + if (propbuf[0] == '\0') + return (-1); + break; + + case ZFS_PROP_CLONES: + if (get_clones_string(zhp, propbuf, proplen) != 0) + return (-1); + break; + + case ZFS_PROP_QUOTA: + case ZFS_PROP_REFQUOTA: + case ZFS_PROP_RESERVATION: + case ZFS_PROP_REFRESERVATION: + + if (get_numeric_property(zhp, prop, src, &source, &val) != 0) + return (-1); + + /* + * If quota or reservation is 0, we translate this into 'none' + * (unless literal is set), and indicate that it's the default + * value. Otherwise, we print the number nicely and indicate + * that its set locally. + */ + if (val == 0) { + if (literal) + (void) strlcpy(propbuf, "0", proplen); + else + (void) strlcpy(propbuf, "none", proplen); + } else { + if (literal) + (void) snprintf(propbuf, proplen, "%llu", + (u_longlong_t)val); + else + zfs_nicenum(val, propbuf, proplen); + } + break; + + case ZFS_PROP_FILESYSTEM_LIMIT: + case ZFS_PROP_SNAPSHOT_LIMIT: + case ZFS_PROP_FILESYSTEM_COUNT: + case ZFS_PROP_SNAPSHOT_COUNT: + + if (get_numeric_property(zhp, prop, src, &source, &val) != 0) + return (-1); + + /* + * If limit is UINT64_MAX, we translate this into 'none' (unless + * literal is set), and indicate that it's the default value. + * Otherwise, we print the number nicely and indicate that it's + * set locally. + */ + if (literal) { + (void) snprintf(propbuf, proplen, "%llu", + (u_longlong_t)val); + } else if (val == UINT64_MAX) { + (void) strlcpy(propbuf, "none", proplen); + } else { + zfs_nicenum(val, propbuf, proplen); + } + break; + + case ZFS_PROP_REFRATIO: + case ZFS_PROP_COMPRESSRATIO: + if (get_numeric_property(zhp, prop, src, &source, &val) != 0) + return (-1); + (void) snprintf(propbuf, proplen, "%llu.%02llux", + (u_longlong_t)(val / 100), + (u_longlong_t)(val % 100)); + break; + + case ZFS_PROP_TYPE: + switch (zhp->zfs_type) { + case ZFS_TYPE_FILESYSTEM: + str = "filesystem"; + break; + case ZFS_TYPE_VOLUME: + str = "volume"; + break; + case ZFS_TYPE_SNAPSHOT: + str = "snapshot"; + break; + case ZFS_TYPE_BOOKMARK: + str = "bookmark"; + break; + default: + abort(); + } + (void) snprintf(propbuf, proplen, "%s", str); + break; + + case ZFS_PROP_MOUNTED: + /* + * The 'mounted' property is a pseudo-property that described + * whether the filesystem is currently mounted. Even though + * it's a boolean value, the typical values of "on" and "off" + * don't make sense, so we translate to "yes" and "no". + */ + if (get_numeric_property(zhp, ZFS_PROP_MOUNTED, + src, &source, &val) != 0) + return (-1); + if (val) + (void) strlcpy(propbuf, "yes", proplen); + else + (void) strlcpy(propbuf, "no", proplen); + break; + + case ZFS_PROP_NAME: + /* + * The 'name' property is a pseudo-property derived from the + * dataset name. It is presented as a real property to simplify + * consumers. + */ + (void) strlcpy(propbuf, zhp->zfs_name, proplen); + break; + + case ZFS_PROP_MLSLABEL: + { +#ifdef sun + m_label_t *new_sl = NULL; + char *ascii = NULL; /* human readable label */ + + (void) strlcpy(propbuf, + getprop_string(zhp, prop, &source), proplen); + + if (literal || (strcasecmp(propbuf, + ZFS_MLSLABEL_DEFAULT) == 0)) + break; + + /* + * Try to translate the internal hex string to + * human-readable output. If there are any + * problems just use the hex string. + */ + + if (str_to_label(propbuf, &new_sl, MAC_LABEL, + L_NO_CORRECTION, NULL) == -1) { + m_label_free(new_sl); + break; + } + + if (label_to_str(new_sl, &ascii, M_LABEL, + DEF_NAMES) != 0) { + if (ascii) + free(ascii); + m_label_free(new_sl); + break; + } + m_label_free(new_sl); + + (void) strlcpy(propbuf, ascii, proplen); + free(ascii); +#else /* !sun */ + propbuf[0] = '\0'; +#endif /* !sun */ + } + break; + + case ZFS_PROP_GUID: + /* + * GUIDs are stored as numbers, but they are identifiers. + * We don't want them to be pretty printed, because pretty + * printing mangles the ID into a truncated and useless value. + */ + if (get_numeric_property(zhp, prop, src, &source, &val) != 0) + return (-1); + (void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val); + break; + + default: + switch (zfs_prop_get_type(prop)) { + case PROP_TYPE_NUMBER: + if (get_numeric_property(zhp, prop, src, + &source, &val) != 0) + return (-1); + if (literal) + (void) snprintf(propbuf, proplen, "%llu", + (u_longlong_t)val); + else + zfs_nicenum(val, propbuf, proplen); + break; + + case PROP_TYPE_STRING: + (void) strlcpy(propbuf, + getprop_string(zhp, prop, &source), proplen); + break; + + case PROP_TYPE_INDEX: + if (get_numeric_property(zhp, prop, src, + &source, &val) != 0) + return (-1); + if (zfs_prop_index_to_string(prop, val, &strval) != 0) + return (-1); + (void) strlcpy(propbuf, strval, proplen); + break; + + default: + abort(); + } + } + + get_source(zhp, src, source, statbuf, statlen); + + return (0); +} + +/* + * Utility function to get the given numeric property. Does no validation that + * the given property is the appropriate type; should only be used with + * hard-coded property types. + */ +uint64_t +zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop) +{ + char *source; + uint64_t val; + + (void) get_numeric_property(zhp, prop, NULL, &source, &val); + + return (val); +} + +int +zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val) +{ + char buf[64]; + + (void) snprintf(buf, sizeof (buf), "%llu", (longlong_t)val); + return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf)); +} + +/* + * Similar to zfs_prop_get(), but returns the value as an integer. + */ +int +zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value, + zprop_source_t *src, char *statbuf, size_t statlen) +{ + char *source; + + /* + * Check to see if this property applies to our object + */ + if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) { + return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE, + dgettext(TEXT_DOMAIN, "cannot get property '%s'"), + zfs_prop_to_name(prop))); + } + + if (src) + *src = ZPROP_SRC_NONE; + + if (get_numeric_property(zhp, prop, src, &source, value) != 0) + return (-1); + + get_source(zhp, src, source, statbuf, statlen); + + return (0); +} + +static int +idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser, + char **domainp, idmap_rid_t *ridp) +{ +#ifdef sun + idmap_get_handle_t *get_hdl = NULL; + idmap_stat status; + int err = EINVAL; + + if (idmap_get_create(&get_hdl) != IDMAP_SUCCESS) + goto out; + + if (isuser) { + err = idmap_get_sidbyuid(get_hdl, id, + IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status); + } else { + err = idmap_get_sidbygid(get_hdl, id, + IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status); + } + if (err == IDMAP_SUCCESS && + idmap_get_mappings(get_hdl) == IDMAP_SUCCESS && + status == IDMAP_SUCCESS) + err = 0; + else + err = EINVAL; +out: + if (get_hdl) + idmap_get_destroy(get_hdl); + return (err); +#else /* !sun */ + assert(!"invalid code path"); + return (EINVAL); // silence compiler warning +#endif /* !sun */ +} + +/* + * convert the propname into parameters needed by kernel + * Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829 + * Eg: userused@matt@domain -> ZFS_PROP_USERUSED, "S-1-123-456", 789 + */ +static int +userquota_propname_decode(const char *propname, boolean_t zoned, + zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp) +{ + zfs_userquota_prop_t type; + char *cp, *end; + char *numericsid = NULL; + boolean_t isuser; + + domain[0] = '\0'; + *ridp = 0; + /* Figure out the property type ({user|group}{quota|space}) */ + for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) { + if (strncmp(propname, zfs_userquota_prop_prefixes[type], + strlen(zfs_userquota_prop_prefixes[type])) == 0) + break; + } + if (type == ZFS_NUM_USERQUOTA_PROPS) + return (EINVAL); + *typep = type; + + isuser = (type == ZFS_PROP_USERQUOTA || + type == ZFS_PROP_USERUSED); + + cp = strchr(propname, '@') + 1; + + if (strchr(cp, '@')) { +#ifdef sun + /* + * It's a SID name (eg "user@domain") that needs to be + * turned into S-1-domainID-RID. + */ + int flag = 0; + idmap_stat stat, map_stat; + uid_t pid; + idmap_rid_t rid; + idmap_get_handle_t *gh = NULL; + + stat = idmap_get_create(&gh); + if (stat != IDMAP_SUCCESS) { + idmap_get_destroy(gh); + return (ENOMEM); + } + if (zoned && getzoneid() == GLOBAL_ZONEID) + return (ENOENT); + if (isuser) { + stat = idmap_getuidbywinname(cp, NULL, flag, &pid); + if (stat < 0) + return (ENOENT); + stat = idmap_get_sidbyuid(gh, pid, flag, &numericsid, + &rid, &map_stat); + } else { + stat = idmap_getgidbywinname(cp, NULL, flag, &pid); + if (stat < 0) + return (ENOENT); + stat = idmap_get_sidbygid(gh, pid, flag, &numericsid, + &rid, &map_stat); + } + if (stat < 0) { + idmap_get_destroy(gh); + return (ENOENT); + } + stat = idmap_get_mappings(gh); + idmap_get_destroy(gh); + + if (stat < 0) { + return (ENOENT); + } + if (numericsid == NULL) + return (ENOENT); + cp = numericsid; + *ridp = rid; + /* will be further decoded below */ +#else /* !sun */ + return (ENOENT); +#endif /* !sun */ + } + + if (strncmp(cp, "S-1-", 4) == 0) { + /* It's a numeric SID (eg "S-1-234-567-89") */ + (void) strlcpy(domain, cp, domainlen); + errno = 0; + if (*ridp == 0) { + cp = strrchr(domain, '-'); + *cp = '\0'; + cp++; + *ridp = strtoull(cp, &end, 10); + } else { + end = ""; + } + if (numericsid) { + free(numericsid); + numericsid = NULL; + } + if (errno != 0 || *end != '\0') + return (EINVAL); + } else if (!isdigit(*cp)) { + /* + * It's a user/group name (eg "user") that needs to be + * turned into a uid/gid + */ + if (zoned && getzoneid() == GLOBAL_ZONEID) + return (ENOENT); + if (isuser) { + struct passwd *pw; + pw = getpwnam(cp); + if (pw == NULL) + return (ENOENT); + *ridp = pw->pw_uid; + } else { + struct group *gr; + gr = getgrnam(cp); + if (gr == NULL) + return (ENOENT); + *ridp = gr->gr_gid; + } + } else { + /* It's a user/group ID (eg "12345"). */ + uid_t id = strtoul(cp, &end, 10); + idmap_rid_t rid; + char *mapdomain; + + if (*end != '\0') + return (EINVAL); + if (id > MAXUID) { + /* It's an ephemeral ID. */ + if (idmap_id_to_numeric_domain_rid(id, isuser, + &mapdomain, &rid) != 0) + return (ENOENT); + (void) strlcpy(domain, mapdomain, domainlen); + *ridp = rid; + } else { + *ridp = id; + } + } + + ASSERT3P(numericsid, ==, NULL); + return (0); +} + +static int +zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue, zfs_userquota_prop_t *typep) +{ + int err; + zfs_cmd_t zc = { 0 }; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + err = userquota_propname_decode(propname, + zfs_prop_get_int(zhp, ZFS_PROP_ZONED), + typep, zc.zc_value, sizeof (zc.zc_value), &zc.zc_guid); + zc.zc_objset_type = *typep; + if (err) + return (err); + + err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_USERSPACE_ONE, &zc); + if (err) + return (err); + + *propvalue = zc.zc_cookie; + return (0); +} + +int +zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue) +{ + zfs_userquota_prop_t type; + + return (zfs_prop_get_userquota_common(zhp, propname, propvalue, + &type)); +} + +int +zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, + char *propbuf, int proplen, boolean_t literal) +{ + int err; + uint64_t propvalue; + zfs_userquota_prop_t type; + + err = zfs_prop_get_userquota_common(zhp, propname, &propvalue, + &type); + + if (err) + return (err); + + if (literal) { + (void) snprintf(propbuf, proplen, "%llu", propvalue); + } else if (propvalue == 0 && + (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA)) { + (void) strlcpy(propbuf, "none", proplen); + } else { + zfs_nicenum(propvalue, propbuf, proplen); + } + return (0); +} + +int +zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, + uint64_t *propvalue) +{ + int err; + zfs_cmd_t zc = { 0 }; + const char *snapname; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + snapname = strchr(propname, '@') + 1; + if (strchr(snapname, '@')) { + (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + } else { + /* snapname is the short name, append it to zhp's fsname */ + char *cp; + + (void) strlcpy(zc.zc_value, zhp->zfs_name, + sizeof (zc.zc_value)); + cp = strchr(zc.zc_value, '@'); + if (cp != NULL) + *cp = '\0'; + (void) strlcat(zc.zc_value, "@", sizeof (zc.zc_value)); + (void) strlcat(zc.zc_value, snapname, sizeof (zc.zc_value)); + } + + err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_WRITTEN, &zc); + if (err) + return (err); + + *propvalue = zc.zc_cookie; + return (0); +} + +int +zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, + char *propbuf, int proplen, boolean_t literal) +{ + int err; + uint64_t propvalue; + + err = zfs_prop_get_written_int(zhp, propname, &propvalue); + + if (err) + return (err); + + if (literal) { + (void) snprintf(propbuf, proplen, "%llu", propvalue); + } else { + zfs_nicenum(propvalue, propbuf, proplen); + } + return (0); +} + +/* + * Returns the name of the given zfs handle. + */ +const char * +zfs_get_name(const zfs_handle_t *zhp) +{ + return (zhp->zfs_name); +} + +/* + * Returns the type of the given zfs handle. + */ +zfs_type_t +zfs_get_type(const zfs_handle_t *zhp) +{ + return (zhp->zfs_type); +} + +/* + * Is one dataset name a child dataset of another? + * + * Needs to handle these cases: + * Dataset 1 "a/foo" "a/foo" "a/foo" "a/foo" + * Dataset 2 "a/fo" "a/foobar" "a/bar/baz" "a/foo/bar" + * Descendant? No. No. No. Yes. + */ +static boolean_t +is_descendant(const char *ds1, const char *ds2) +{ + size_t d1len = strlen(ds1); + + /* ds2 can't be a descendant if it's smaller */ + if (strlen(ds2) < d1len) + return (B_FALSE); + + /* otherwise, compare strings and verify that there's a '/' char */ + return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0)); +} + +/* + * Given a complete name, return just the portion that refers to the parent. + * Will return -1 if there is no parent (path is just the name of the + * pool). + */ +static int +parent_name(const char *path, char *buf, size_t buflen) +{ + char *slashp; + + (void) strlcpy(buf, path, buflen); + + if ((slashp = strrchr(buf, '/')) == NULL) + return (-1); + *slashp = '\0'; + + return (0); +} + +/* + * If accept_ancestor is false, then check to make sure that the given path has + * a parent, and that it exists. If accept_ancestor is true, then find the + * closest existing ancestor for the given path. In prefixlen return the + * length of already existing prefix of the given path. We also fetch the + * 'zoned' property, which is used to validate property settings when creating + * new datasets. + */ +static int +check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, + boolean_t accept_ancestor, int *prefixlen) +{ + zfs_cmd_t zc = { 0 }; + char parent[ZFS_MAXNAMELEN]; + char *slash; + zfs_handle_t *zhp; + char errbuf[1024]; + uint64_t is_zoned; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); + + /* get parent, and check to see if this is just a pool */ + if (parent_name(path, parent, sizeof (parent)) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "missing dataset name")); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + + /* check to see if the pool exists */ + if ((slash = strchr(parent, '/')) == NULL) + slash = parent + strlen(parent); + (void) strncpy(zc.zc_name, parent, slash - parent); + zc.zc_name[slash - parent] = '\0'; + if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 && + errno == ENOENT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no such pool '%s'"), zc.zc_name); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } + + /* check to see if the parent dataset exists */ + while ((zhp = make_dataset_handle(hdl, parent)) == NULL) { + if (errno == ENOENT && accept_ancestor) { + /* + * Go deeper to find an ancestor, give up on top level. + */ + if (parent_name(parent, parent, sizeof (parent)) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no such pool '%s'"), zc.zc_name); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } + } else if (errno == ENOENT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent does not exist")); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } else + return (zfs_standard_error(hdl, errno, errbuf)); + } + + is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); + if (zoned != NULL) + *zoned = is_zoned; + + /* we are in a non-global zone, but parent is in the global zone */ + if (getzoneid() != GLOBAL_ZONEID && !is_zoned) { + (void) zfs_standard_error(hdl, EPERM, errbuf); + zfs_close(zhp); + return (-1); + } + + /* make sure parent is a filesystem */ + if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent is not a filesystem")); + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + zfs_close(zhp); + return (-1); + } + + zfs_close(zhp); + if (prefixlen != NULL) + *prefixlen = strlen(parent); + return (0); +} + +/* + * Finds whether the dataset of the given type(s) exists. + */ +boolean_t +zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types) +{ + zfs_handle_t *zhp; + + if (!zfs_validate_name(hdl, path, types, B_FALSE)) + return (B_FALSE); + + /* + * Try to get stats for the dataset, which will tell us if it exists. + */ + if ((zhp = make_dataset_handle(hdl, path)) != NULL) { + int ds_type = zhp->zfs_type; + + zfs_close(zhp); + if (types & ds_type) + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Given a path to 'target', create all the ancestors between + * the prefixlen portion of the path, and the target itself. + * Fail if the initial prefixlen-ancestor does not already exist. + */ +int +create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) +{ + zfs_handle_t *h; + char *cp; + const char *opname; + + /* make sure prefix exists */ + cp = target + prefixlen; + if (*cp != '/') { + assert(strchr(cp, '/') == NULL); + h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); + } else { + *cp = '\0'; + h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); + *cp = '/'; + } + if (h == NULL) + return (-1); + zfs_close(h); + + /* + * Attempt to create, mount, and share any ancestor filesystems, + * up to the prefixlen-long one. + */ + for (cp = target + prefixlen + 1; + cp = strchr(cp, '/'); *cp = '/', cp++) { + + *cp = '\0'; + + h = make_dataset_handle(hdl, target); + if (h) { + /* it already exists, nothing to do here */ + zfs_close(h); + continue; + } + + if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM, + NULL) != 0) { + opname = dgettext(TEXT_DOMAIN, "create"); + goto ancestorerr; + } + + h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); + if (h == NULL) { + opname = dgettext(TEXT_DOMAIN, "open"); + goto ancestorerr; + } + + if (zfs_mount(h, NULL, 0) != 0) { + opname = dgettext(TEXT_DOMAIN, "mount"); + goto ancestorerr; + } + + if (zfs_share(h) != 0) { + opname = dgettext(TEXT_DOMAIN, "share"); + goto ancestorerr; + } + + zfs_close(h); + } + + return (0); + +ancestorerr: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to %s ancestor '%s'"), opname, target); + return (-1); +} + +/* + * Creates non-existing ancestors of the given path. + */ +int +zfs_create_ancestors(libzfs_handle_t *hdl, const char *path) +{ + int prefix; + char *path_copy; + int rc; + + if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0) + return (-1); + + if ((path_copy = strdup(path)) != NULL) { + rc = create_parents(hdl, path_copy, prefix); + free(path_copy); + } + if (path_copy == NULL || rc != 0) + return (-1); + + return (0); +} + +/* + * Create a new filesystem or volume. + */ +int +zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, + nvlist_t *props) +{ + int ret; + uint64_t size = 0; + uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); + char errbuf[1024]; + uint64_t zoned; + dmu_objset_type_t ost; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot create '%s'"), path); + + /* validate the path, taking care to note the extended error message */ + if (!zfs_validate_name(hdl, path, type, B_TRUE)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + + /* validate parents exist */ + if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0) + return (-1); + + /* + * The failure modes when creating a dataset of a different type over + * one that already exists is a little strange. In particular, if you + * try to create a dataset on top of an existing dataset, the ioctl() + * will return ENOENT, not EEXIST. To prevent this from happening, we + * first try to see if the dataset exists. + */ + if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset already exists")); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + } + + if (type == ZFS_TYPE_VOLUME) + ost = DMU_OST_ZVOL; + else + ost = DMU_OST_ZFS; + + if (props && (props = zfs_valid_proplist(hdl, type, props, + zoned, NULL, errbuf)) == 0) + return (-1); + + if (type == ZFS_TYPE_VOLUME) { + /* + * If we are creating a volume, the size and block size must + * satisfy a few restraints. First, the blocksize must be a + * valid block size between SPA_{MIN,MAX}BLOCKSIZE. Second, the + * volsize must be a multiple of the block size, and cannot be + * zero. + */ + if (props == NULL || nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) { + nvlist_free(props); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "missing volume size")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + + if ((ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &blocksize)) != 0) { + if (ret == ENOENT) { + blocksize = zfs_prop_default_numeric( + ZFS_PROP_VOLBLOCKSIZE); + } else { + nvlist_free(props); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "missing volume block size")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + } + + if (size == 0) { + nvlist_free(props); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "volume size cannot be zero")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + + if (size % blocksize != 0) { + nvlist_free(props); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "volume size must be a multiple of volume block " + "size")); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + } + + /* create the dataset */ + ret = lzc_create(path, ost, props); + nvlist_free(props); + + /* check for failure */ + if (ret != 0) { + char parent[ZFS_MAXNAMELEN]; + (void) parent_name(path, parent, sizeof (parent)); + + switch (errno) { + case ENOENT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no such parent '%s'"), parent); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + + case EINVAL: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent '%s' is not a filesystem"), parent); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + + case EDOM: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "volume block size must be power of 2 from " + "512B to 128KB")); + + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded to set this " + "property or value")); + return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); +#ifdef _ILP32 + case EOVERFLOW: + /* + * This platform can't address a volume this big. + */ + if (type == ZFS_TYPE_VOLUME) + return (zfs_error(hdl, EZFS_VOLTOOBIG, + errbuf)); +#endif + /* FALLTHROUGH */ + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + + return (0); +} + +/* + * Destroys the given dataset. The caller must make sure that the filesystem + * isn't mounted, and that there are no active dependents. If the file system + * does not exist this function does nothing. + */ +int +zfs_destroy(zfs_handle_t *zhp, boolean_t defer) +{ + zfs_cmd_t zc = { 0 }; + + if (zhp->zfs_type == ZFS_TYPE_BOOKMARK) { + nvlist_t *nv = fnvlist_alloc(); + fnvlist_add_boolean(nv, zhp->zfs_name); + int error = lzc_destroy_bookmarks(nv, NULL); + fnvlist_free(nv); + if (error != 0) { + return (zfs_standard_error_fmt(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), + zhp->zfs_name)); + } + return (0); + } + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + if (ZFS_IS_VOLUME(zhp)) { + zc.zc_objset_type = DMU_OST_ZVOL; + } else { + zc.zc_objset_type = DMU_OST_ZFS; + } + + zc.zc_defer_destroy = defer; + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0 && + errno != ENOENT) { + return (zfs_standard_error_fmt(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), + zhp->zfs_name)); + } + + remove_mountpoint(zhp); + + return (0); +} + +struct destroydata { + nvlist_t *nvl; + const char *snapname; +}; + +static int +zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) +{ + struct destroydata *dd = arg; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, dd->snapname); + + if (lzc_exists(name)) + verify(nvlist_add_boolean(dd->nvl, name) == 0); + + rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); + zfs_close(zhp); + return (rv); +} + +/* + * Destroys all snapshots with the given name in zhp & descendants. + */ +int +zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) +{ + int ret; + struct destroydata dd = { 0 }; + + dd.snapname = snapname; + verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); + (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); + + if (nvlist_empty(dd.nvl)) { + ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, + dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), + zhp->zfs_name, snapname); + } else { + ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer); + } + nvlist_free(dd.nvl); + return (ret); +} + +/* + * Destroys all the snapshots named in the nvlist. + */ +int +zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) +{ + int ret; + nvlist_t *errlist; + + ret = lzc_destroy_snaps(snaps, defer, &errlist); + + if (ret == 0) + return (0); + + if (nvlist_empty(errlist)) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); + + ret = zfs_standard_error(hdl, ret, errbuf); + } + for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL); + pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"), + nvpair_name(pair)); + + switch (fnvpair_value_int32(pair)) { + case EEXIST: + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "snapshot is cloned")); + ret = zfs_error(hdl, EZFS_EXISTS, errbuf); + break; + default: + ret = zfs_standard_error(hdl, errno, errbuf); + break; + } + } + + return (ret); +} + +/* + * Clones the given dataset. The target must be of the same type as the source. + */ +int +zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) +{ + char parent[ZFS_MAXNAMELEN]; + int ret; + char errbuf[1024]; + libzfs_handle_t *hdl = zhp->zfs_hdl; + uint64_t zoned; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot create '%s'"), target); + + /* validate the target/clone name */ + if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + + /* validate parents exist */ + if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0) + return (-1); + + (void) parent_name(target, parent, sizeof (parent)); + + /* do the clone */ + + if (props) { + zfs_type_t type; + if (ZFS_IS_VOLUME(zhp)) { + type = ZFS_TYPE_VOLUME; + } else { + type = ZFS_TYPE_FILESYSTEM; + } + if ((props = zfs_valid_proplist(hdl, type, props, zoned, + zhp, errbuf)) == NULL) + return (-1); + } + + ret = lzc_clone(target, zhp->zfs_name, props); + nvlist_free(props); + + if (ret != 0) { + switch (errno) { + + case ENOENT: + /* + * The parent doesn't exist. We should have caught this + * above, but there may a race condition that has since + * destroyed the parent. + * + * At this point, we don't know whether it's the source + * that doesn't exist anymore, or whether the target + * dataset doesn't exist. + */ + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "no such parent '%s'"), parent); + return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); + + case EXDEV: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "source and target pools differ")); + return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET, + errbuf)); + + default: + return (zfs_standard_error(zhp->zfs_hdl, errno, + errbuf)); + } + } + + return (ret); +} + +/* + * Promotes the given clone fs to be the clone parent. + */ +int +zfs_promote(zfs_handle_t *zhp) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + zfs_cmd_t zc = { 0 }; + char parent[MAXPATHLEN]; + int ret; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot promote '%s'"), zhp->zfs_name); + + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshots can not be promoted")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + } + + (void) strlcpy(parent, zhp->zfs_dmustats.dds_origin, sizeof (parent)); + if (parent[0] == '\0') { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not a cloned filesystem")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + } + + (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin, + sizeof (zc.zc_value)); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + + if (ret != 0) { + int save_errno = errno; + + switch (save_errno) { + case EEXIST: + /* There is a conflicting snapshot name. */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "conflicting snapshot '%s' from parent '%s'"), + zc.zc_string, parent); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + + default: + return (zfs_standard_error(hdl, save_errno, errbuf)); + } + } + return (ret); +} + +typedef struct snapdata { + nvlist_t *sd_nvl; + const char *sd_snapname; +} snapdata_t; + +static int +zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) +{ + snapdata_t *sd = arg; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) == 0) { + (void) snprintf(name, sizeof (name), + "%s@%s", zfs_get_name(zhp), sd->sd_snapname); + + fnvlist_add_boolean(sd->sd_nvl, name); + + rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); + } + zfs_close(zhp); + + return (rv); +} + +/* + * Creates snapshots. The keys in the snaps nvlist are the snapshots to be + * created. + */ +int +zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) +{ + int ret; + char errbuf[1024]; + nvpair_t *elem; + nvlist_t *errors; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot create snapshots ")); + + elem = NULL; + while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) { + const char *snapname = nvpair_name(elem); + + /* validate the target name */ + if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT, + B_TRUE)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot create snapshot '%s'"), snapname); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + } + + if (props != NULL && + (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, + props, B_FALSE, NULL, errbuf)) == NULL) { + return (-1); + } + + ret = lzc_snapshot(snaps, props, &errors); + + if (ret != 0) { + boolean_t printed = B_FALSE; + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot create snapshot '%s'"), nvpair_name(elem)); + (void) zfs_standard_error(hdl, + fnvpair_value_int32(elem), errbuf); + printed = B_TRUE; + } + if (!printed) { + switch (ret) { + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "multiple snapshots of same " + "fs not allowed")); + (void) zfs_error(hdl, EZFS_EXISTS, errbuf); + + break; + default: + (void) zfs_standard_error(hdl, ret, errbuf); + } + } + } + + nvlist_free(props); + nvlist_free(errors); + return (ret); +} + +int +zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, + nvlist_t *props) +{ + int ret; + snapdata_t sd = { 0 }; + char fsname[ZFS_MAXNAMELEN]; + char *cp; + zfs_handle_t *zhp; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot snapshot %s"), path); + + if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + + (void) strlcpy(fsname, path, sizeof (fsname)); + cp = strchr(fsname, '@'); + *cp = '\0'; + sd.sd_snapname = cp + 1; + + if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | + ZFS_TYPE_VOLUME)) == NULL) { + return (-1); + } + + verify(nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) == 0); + if (recursive) { + (void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd); + } else { + fnvlist_add_boolean(sd.sd_nvl, path); + } + + ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props); + nvlist_free(sd.sd_nvl); + zfs_close(zhp); + return (ret); +} + +/* + * Destroy any more recent snapshots. We invoke this callback on any dependents + * of the snapshot first. If the 'cb_dependent' member is non-zero, then this + * is a dependent and we should just destroy it without checking the transaction + * group. + */ +typedef struct rollback_data { + const char *cb_target; /* the snapshot */ + uint64_t cb_create; /* creation time reference */ + boolean_t cb_error; + boolean_t cb_force; +} rollback_data_t; + +static int +rollback_destroy_dependent(zfs_handle_t *zhp, void *data) +{ + rollback_data_t *cbp = data; + prop_changelist_t *clp; + + /* We must destroy this clone; first unmount it */ + clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, + cbp->cb_force ? MS_FORCE: 0); + if (clp == NULL || changelist_prefix(clp) != 0) { + cbp->cb_error = B_TRUE; + zfs_close(zhp); + return (0); + } + if (zfs_destroy(zhp, B_FALSE) != 0) + cbp->cb_error = B_TRUE; + else + changelist_remove(clp, zhp->zfs_name); + (void) changelist_postfix(clp); + changelist_free(clp); + + zfs_close(zhp); + return (0); +} + +static int +rollback_destroy(zfs_handle_t *zhp, void *data) +{ + rollback_data_t *cbp = data; + + if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { + cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE, + rollback_destroy_dependent, cbp); + + cbp->cb_error |= zfs_destroy(zhp, B_FALSE); + } + + zfs_close(zhp); + return (0); +} + +/* + * Given a dataset, rollback to a specific snapshot, discarding any + * data changes since then and making it the active dataset. + * + * Any snapshots and bookmarks more recent than the target are + * destroyed, along with their dependents (i.e. clones). + */ +int +zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) +{ + rollback_data_t cb = { 0 }; + int err; + boolean_t restore_resv = 0; + uint64_t old_volsize, new_volsize; + zfs_prop_t resv_prop; + + assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM || + zhp->zfs_type == ZFS_TYPE_VOLUME); + + /* + * Destroy all recent snapshots and their dependents. + */ + cb.cb_force = force; + cb.cb_target = snap->zfs_name; + cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); + (void) zfs_iter_snapshots(zhp, B_FALSE, rollback_destroy, &cb); + (void) zfs_iter_bookmarks(zhp, rollback_destroy, &cb); + + if (cb.cb_error) + return (-1); + + /* + * Now that we have verified that the snapshot is the latest, + * rollback to the given snapshot. + */ + + if (zhp->zfs_type == ZFS_TYPE_VOLUME) { + if (zfs_which_resv_prop(zhp, &resv_prop) < 0) + return (-1); + old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); + restore_resv = + (old_volsize == zfs_prop_get_int(zhp, resv_prop)); + } + + /* + * We rely on zfs_iter_children() to verify that there are no + * newer snapshots for the given dataset. Therefore, we can + * simply pass the name on to the ioctl() call. There is still + * an unlikely race condition where the user has taken a + * snapshot since we verified that this was the most recent. + */ + err = lzc_rollback(zhp->zfs_name, NULL, 0); + if (err != 0) { + (void) zfs_standard_error_fmt(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot rollback '%s'"), + zhp->zfs_name); + return (err); + } + + /* + * For volumes, if the pre-rollback volsize matched the pre- + * rollback reservation and the volsize has changed then set + * the reservation property to the post-rollback volsize. + * Make a new handle since the rollback closed the dataset. + */ + if ((zhp->zfs_type == ZFS_TYPE_VOLUME) && + (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) { + if (restore_resv) { + new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); + if (old_volsize != new_volsize) + err = zfs_prop_set_int(zhp, resv_prop, + new_volsize); + } + zfs_close(zhp); + } + return (err); +} + +/* + * Renames the given dataset. + */ +int +zfs_rename(zfs_handle_t *zhp, const char *source, const char *target, + renameflags_t flags) +{ + int ret; + zfs_cmd_t zc = { 0 }; + char *delim; + prop_changelist_t *cl = NULL; + zfs_handle_t *zhrp = NULL; + char *parentname = NULL; + char parent[ZFS_MAXNAMELEN]; + char property[ZFS_MAXPROPLEN]; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + + /* if we have the same exact name, just return success */ + if (strcmp(zhp->zfs_name, target) == 0) + return (0); + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot rename to '%s'"), target); + + if (source != NULL) { + /* + * This is recursive snapshots rename, put snapshot name + * (that might not exist) into zfs_name. + */ + assert(flags.recurse); + + (void) strlcat(zhp->zfs_name, "@", sizeof(zhp->zfs_name)); + (void) strlcat(zhp->zfs_name, source, sizeof(zhp->zfs_name)); + zhp->zfs_type = ZFS_TYPE_SNAPSHOT; + } + + /* + * Make sure the target name is valid + */ + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { + if ((strchr(target, '@') == NULL) || + *target == '@') { + /* + * Snapshot target name is abbreviated, + * reconstruct full dataset name + */ + (void) strlcpy(parent, zhp->zfs_name, + sizeof (parent)); + delim = strchr(parent, '@'); + if (strchr(target, '@') == NULL) + *(++delim) = '\0'; + else + *delim = '\0'; + (void) strlcat(parent, target, sizeof (parent)); + target = parent; + } else { + /* + * Make sure we're renaming within the same dataset. + */ + delim = strchr(target, '@'); + if (strncmp(zhp->zfs_name, target, delim - target) + != 0 || zhp->zfs_name[delim - target] != '@') { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshots must be part of same " + "dataset")); + return (zfs_error(hdl, EZFS_CROSSTARGET, + errbuf)); + } + } + if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } else { + if (flags.recurse) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "recursive rename must be a snapshot")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + } + + if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + + /* validate parents */ + if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0) + return (-1); + + /* make sure we're in the same pool */ + verify((delim = strchr(target, '/')) != NULL); + if (strncmp(zhp->zfs_name, target, delim - target) != 0 || + zhp->zfs_name[delim - target] != '/') { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "datasets must be within same pool")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + } + + /* new name cannot be a child of the current dataset name */ + if (is_descendant(zhp->zfs_name, target)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "New dataset name cannot be a descendant of " + "current dataset name")); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + } + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name); + + if (getzoneid() == GLOBAL_ZONEID && + zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset is used in a non-global zone")); + return (zfs_error(hdl, EZFS_ZONED, errbuf)); + } + + /* + * Avoid unmounting file systems with mountpoint property set to + * 'legacy' or 'none' even if -u option is not given. + */ + if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && + !flags.recurse && !flags.nounmount && + zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, property, + sizeof (property), NULL, NULL, 0, B_FALSE) == 0 && + (strcmp(property, "legacy") == 0 || + strcmp(property, "none") == 0)) { + flags.nounmount = B_TRUE; + } + if (flags.recurse) { + + parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); + if (parentname == NULL) { + ret = -1; + goto error; + } + delim = strchr(parentname, '@'); + *delim = '\0'; + zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET); + if (zhrp == NULL) { + ret = -1; + goto error; + } + } else if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) { + if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, + flags.nounmount ? CL_GATHER_DONT_UNMOUNT : 0, + flags.forceunmount ? MS_FORCE : 0)) == NULL) { + return (-1); + } + + if (changelist_haszonedchild(cl)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "child dataset with inherited mountpoint is used " + "in a non-global zone")); + (void) zfs_error(hdl, EZFS_ZONED, errbuf); + goto error; + } + + if ((ret = changelist_prefix(cl)) != 0) + goto error; + } + + if (ZFS_IS_VOLUME(zhp)) + zc.zc_objset_type = DMU_OST_ZVOL; + else + zc.zc_objset_type = DMU_OST_ZFS; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); + + zc.zc_cookie = flags.recurse ? 1 : 0; + if (flags.nounmount) + zc.zc_cookie |= 2; + + if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) { + /* + * if it was recursive, the one that actually failed will + * be in zc.zc_name + */ + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot rename '%s'"), zc.zc_name); + + if (flags.recurse && errno == EEXIST) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "a child dataset already has a snapshot " + "with the new name")); + (void) zfs_error(hdl, EZFS_EXISTS, errbuf); + } else { + (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); + } + + /* + * On failure, we still want to remount any filesystems that + * were previously mounted, so we don't alter the system state. + */ + if (cl != NULL) + (void) changelist_postfix(cl); + } else { + if (cl != NULL) { + changelist_rename(cl, zfs_get_name(zhp), target); + ret = changelist_postfix(cl); + } + } + +error: + if (parentname != NULL) { + free(parentname); + } + if (zhrp != NULL) { + zfs_close(zhrp); + } + if (cl != NULL) { + changelist_free(cl); + } + return (ret); +} + +nvlist_t * +zfs_get_user_props(zfs_handle_t *zhp) +{ + return (zhp->zfs_user_props); +} + +nvlist_t * +zfs_get_recvd_props(zfs_handle_t *zhp) +{ + if (zhp->zfs_recvd_props == NULL) + if (get_recvd_props_ioctl(zhp) != 0) + return (NULL); + return (zhp->zfs_recvd_props); +} + +/* + * This function is used by 'zfs list' to determine the exact set of columns to + * display, and their maximum widths. This does two main things: + * + * - If this is a list of all properties, then expand the list to include + * all native properties, and set a flag so that for each dataset we look + * for new unique user properties and add them to the list. + * + * - For non fixed-width properties, keep track of the maximum width seen + * so that we can size the column appropriately. If the user has + * requested received property values, we also need to compute the width + * of the RECEIVED column. + */ +int +zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received, + boolean_t literal) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + zprop_list_t *entry; + zprop_list_t **last, **start; + nvlist_t *userprops, *propval; + nvpair_t *elem; + char *strval; + char buf[ZFS_MAXPROPLEN]; + + if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0) + return (-1); + + userprops = zfs_get_user_props(zhp); + + entry = *plp; + if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) { + /* + * Go through and add any user properties as necessary. We + * start by incrementing our list pointer to the first + * non-native property. + */ + start = plp; + while (*start != NULL) { + if ((*start)->pl_prop == ZPROP_INVAL) + break; + start = &(*start)->pl_next; + } + + elem = NULL; + while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) { + /* + * See if we've already found this property in our list. + */ + for (last = start; *last != NULL; + last = &(*last)->pl_next) { + if (strcmp((*last)->pl_user_prop, + nvpair_name(elem)) == 0) + break; + } + + if (*last == NULL) { + if ((entry = zfs_alloc(hdl, + sizeof (zprop_list_t))) == NULL || + ((entry->pl_user_prop = zfs_strdup(hdl, + nvpair_name(elem)))) == NULL) { + free(entry); + return (-1); + } + + entry->pl_prop = ZPROP_INVAL; + entry->pl_width = strlen(nvpair_name(elem)); + entry->pl_all = B_TRUE; + *last = entry; + } + } + } + + /* + * Now go through and check the width of any non-fixed columns + */ + for (entry = *plp; entry != NULL; entry = entry->pl_next) { + if (entry->pl_fixed && !literal) + continue; + + if (entry->pl_prop != ZPROP_INVAL) { + if (zfs_prop_get(zhp, entry->pl_prop, + buf, sizeof (buf), NULL, NULL, 0, literal) == 0) { + if (strlen(buf) > entry->pl_width) + entry->pl_width = strlen(buf); + } + if (received && zfs_prop_get_recvd(zhp, + zfs_prop_to_name(entry->pl_prop), + buf, sizeof (buf), literal) == 0) + if (strlen(buf) > entry->pl_recvd_width) + entry->pl_recvd_width = strlen(buf); + } else { + if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop, + &propval) == 0) { + verify(nvlist_lookup_string(propval, + ZPROP_VALUE, &strval) == 0); + if (strlen(strval) > entry->pl_width) + entry->pl_width = strlen(strval); + } + if (received && zfs_prop_get_recvd(zhp, + entry->pl_user_prop, + buf, sizeof (buf), literal) == 0) + if (strlen(buf) > entry->pl_recvd_width) + entry->pl_recvd_width = strlen(buf); + } + } + + return (0); +} + +int +zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path, + char *resource, void *export, void *sharetab, + int sharemax, zfs_share_op_t operation) +{ + zfs_cmd_t zc = { 0 }; + int error; + + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value)); + if (resource) + (void) strlcpy(zc.zc_string, resource, sizeof (zc.zc_string)); + zc.zc_share.z_sharedata = (uint64_t)(uintptr_t)sharetab; + zc.zc_share.z_exportdata = (uint64_t)(uintptr_t)export; + zc.zc_share.z_sharetype = operation; + zc.zc_share.z_sharemax = sharemax; + error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc); + return (error); +} + +void +zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props) +{ + nvpair_t *curr; + + /* + * Keep a reference to the props-table against which we prune the + * properties. + */ + zhp->zfs_props_table = props; + + curr = nvlist_next_nvpair(zhp->zfs_props, NULL); + + while (curr) { + zfs_prop_t zfs_prop = zfs_name_to_prop(nvpair_name(curr)); + nvpair_t *next = nvlist_next_nvpair(zhp->zfs_props, curr); + + /* + * User properties will result in ZPROP_INVAL, and since we + * only know how to prune standard ZFS properties, we always + * leave these in the list. This can also happen if we + * encounter an unknown DSL property (when running older + * software, for example). + */ + if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE) + (void) nvlist_remove(zhp->zfs_props, + nvpair_name(curr), nvpair_type(curr)); + curr = next; + } +} + +#ifdef sun +static int +zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path, + zfs_smb_acl_op_t cmd, char *resource1, char *resource2) +{ + zfs_cmd_t zc = { 0 }; + nvlist_t *nvlist = NULL; + int error; + + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value)); + zc.zc_cookie = (uint64_t)cmd; + + if (cmd == ZFS_SMB_ACL_RENAME) { + if (nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0) != 0) { + (void) no_memory(hdl); + return (NULL); + } + } + + switch (cmd) { + case ZFS_SMB_ACL_ADD: + case ZFS_SMB_ACL_REMOVE: + (void) strlcpy(zc.zc_string, resource1, sizeof (zc.zc_string)); + break; + case ZFS_SMB_ACL_RENAME: + if (nvlist_add_string(nvlist, ZFS_SMB_ACL_SRC, + resource1) != 0) { + (void) no_memory(hdl); + return (-1); + } + if (nvlist_add_string(nvlist, ZFS_SMB_ACL_TARGET, + resource2) != 0) { + (void) no_memory(hdl); + return (-1); + } + if (zcmd_write_src_nvlist(hdl, &zc, nvlist) != 0) { + nvlist_free(nvlist); + return (-1); + } + break; + case ZFS_SMB_ACL_PURGE: + break; + default: + return (-1); + } + error = ioctl(hdl->libzfs_fd, ZFS_IOC_SMB_ACL, &zc); + if (nvlist) + nvlist_free(nvlist); + return (error); +} + +int +zfs_smb_acl_add(libzfs_handle_t *hdl, char *dataset, + char *path, char *resource) +{ + return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_ADD, + resource, NULL)); +} + +int +zfs_smb_acl_remove(libzfs_handle_t *hdl, char *dataset, + char *path, char *resource) +{ + return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_REMOVE, + resource, NULL)); +} + +int +zfs_smb_acl_purge(libzfs_handle_t *hdl, char *dataset, char *path) +{ + return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_PURGE, + NULL, NULL)); +} + +int +zfs_smb_acl_rename(libzfs_handle_t *hdl, char *dataset, char *path, + char *oldname, char *newname) +{ + return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_RENAME, + oldname, newname)); +} +#endif /* sun */ + +int +zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, + zfs_userspace_cb_t func, void *arg) +{ + zfs_cmd_t zc = { 0 }; + zfs_useracct_t buf[100]; + libzfs_handle_t *hdl = zhp->zfs_hdl; + int ret; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + zc.zc_objset_type = type; + zc.zc_nvlist_dst = (uintptr_t)buf; + + for (;;) { + zfs_useracct_t *zua = buf; + + zc.zc_nvlist_dst_size = sizeof (buf); + if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) { + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot get used/quota for %s"), zc.zc_name); + return (zfs_standard_error_fmt(hdl, errno, errbuf)); + } + if (zc.zc_nvlist_dst_size == 0) + break; + + while (zc.zc_nvlist_dst_size > 0) { + if ((ret = func(arg, zua->zu_domain, zua->zu_rid, + zua->zu_space)) != 0) + return (ret); + zua++; + zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t); + } + } + + return (0); +} + +struct holdarg { + nvlist_t *nvl; + const char *snapname; + const char *tag; + boolean_t recursive; + int error; +}; + +static int +zfs_hold_one(zfs_handle_t *zhp, void *arg) +{ + struct holdarg *ha = arg; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, ha->snapname); + + if (lzc_exists(name)) + fnvlist_add_string(ha->nvl, name, ha->tag); + + if (ha->recursive) + rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); + zfs_close(zhp); + return (rv); +} + +int +zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, + boolean_t recursive, int cleanup_fd) +{ + int ret; + struct holdarg ha; + + ha.nvl = fnvlist_alloc(); + ha.snapname = snapname; + ha.tag = tag; + ha.recursive = recursive; + (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); + + if (nvlist_empty(ha.nvl)) { + char errbuf[1024]; + + fnvlist_free(ha.nvl); + ret = ENOENT; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot hold snapshot '%s@%s'"), + zhp->zfs_name, snapname); + (void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf); + return (ret); + } + + ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl); + fnvlist_free(ha.nvl); + + return (ret); +} + +int +zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds) +{ + int ret; + nvlist_t *errors; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + nvpair_t *elem; + + errors = NULL; + ret = lzc_hold(holds, cleanup_fd, &errors); + + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); + return (0); + } + + if (nvlist_empty(errors)) { + /* no hold-specific errors */ + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot hold")); + switch (ret) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + default: + (void) zfs_standard_error(hdl, ret, errbuf); + } + } + + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot hold snapshot '%s'"), nvpair_name(elem)); + switch (fnvpair_value_int32(elem)) { + case E2BIG: + /* + * Temporary tags wind up having the ds object id + * prepended. So even if we passed the length check + * above, it's still possible for the tag to wind + * up being slightly too long. + */ + (void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf); + break; + case EINVAL: + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case EEXIST: + (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); + break; + default: + (void) zfs_standard_error(hdl, + fnvpair_value_int32(elem), errbuf); + } + } + + fnvlist_free(errors); + return (ret); +} + +static int +zfs_release_one(zfs_handle_t *zhp, void *arg) +{ + struct holdarg *ha = arg; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + nvlist_t *existing_holds; + + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, ha->snapname); + + if (lzc_get_holds(name, &existing_holds) != 0) { + ha->error = ENOENT; + } else if (!nvlist_exists(existing_holds, ha->tag)) { + ha->error = ESRCH; + } else { + nvlist_t *torelease = fnvlist_alloc(); + fnvlist_add_boolean(torelease, ha->tag); + fnvlist_add_nvlist(ha->nvl, name, torelease); + fnvlist_free(torelease); + } + + if (ha->recursive) + rv = zfs_iter_filesystems(zhp, zfs_release_one, ha); + zfs_close(zhp); + return (rv); +} + +int +zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, + boolean_t recursive) +{ + int ret; + struct holdarg ha; + nvlist_t *errors = NULL; + nvpair_t *elem; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + + ha.nvl = fnvlist_alloc(); + ha.snapname = snapname; + ha.tag = tag; + ha.recursive = recursive; + ha.error = 0; + (void) zfs_release_one(zfs_handle_dup(zhp), &ha); + + if (nvlist_empty(ha.nvl)) { + fnvlist_free(ha.nvl); + ret = ha.error; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot release hold from snapshot '%s@%s'"), + zhp->zfs_name, snapname); + if (ret == ESRCH) { + (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); + } else { + (void) zfs_standard_error(hdl, ret, errbuf); + } + return (ret); + } + + ret = lzc_release(ha.nvl, &errors); + fnvlist_free(ha.nvl); + + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); + return (0); + } + + if (nvlist_empty(errors)) { + /* no hold-specific errors */ + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot release")); + switch (errno) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + default: + (void) zfs_standard_error_fmt(hdl, errno, errbuf); + } + } + + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot release hold from snapshot '%s'"), + nvpair_name(elem)); + switch (fnvpair_value_int32(elem)) { + case ESRCH: + (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); + break; + case EINVAL: + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + default: + (void) zfs_standard_error_fmt(hdl, + fnvpair_value_int32(elem), errbuf); + } + } + + fnvlist_free(errors); + return (ret); +} + +int +zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + int nvsz = 2048; + void *nvbuf; + int err = 0; + char errbuf[1024]; + + assert(zhp->zfs_type == ZFS_TYPE_VOLUME || + zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + +tryagain: + + nvbuf = malloc(nvsz); + if (nvbuf == NULL) { + err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); + goto out; + } + + zc.zc_nvlist_dst_size = nvsz; + zc.zc_nvlist_dst = (uintptr_t)nvbuf; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"), + zc.zc_name); + switch (errno) { + case ENOMEM: + free(nvbuf); + nvsz = zc.zc_nvlist_dst_size; + goto tryagain; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } else { + /* success */ + int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); + if (rc) { + (void) snprintf(errbuf, sizeof (errbuf), dgettext( + TEXT_DOMAIN, "cannot get permissions on '%s'"), + zc.zc_name); + err = zfs_standard_error_fmt(hdl, rc, errbuf); + } + } + + free(nvbuf); +out: + return (err); +} + +int +zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char *nvbuf; + char errbuf[1024]; + size_t nvsz; + int err; + + assert(zhp->zfs_type == ZFS_TYPE_VOLUME || + zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + + err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE); + assert(err == 0); + + nvbuf = malloc(nvsz); + + err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0); + assert(err == 0); + + zc.zc_nvlist_src_size = nvsz; + zc.zc_nvlist_src = (uintptr_t)nvbuf; + zc.zc_perm_action = un; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"), + zc.zc_name); + switch (errno) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } + + free(nvbuf); + + return (err); +} + +int +zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) +{ + int err; + char errbuf[1024]; + + err = lzc_get_holds(zhp->zfs_name, nvl); + + if (err != 0) { + libzfs_handle_t *hdl = zhp->zfs_hdl; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), + zhp->zfs_name); + switch (err) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } + + return (err); +} + +/* + * Convert the zvol's volume size to an appropriate reservation. + * Note: If this routine is updated, it is necessary to update the ZFS test + * suite's shell version in reservation.kshlib. + */ +uint64_t +zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) +{ + uint64_t numdb; + uint64_t nblocks, volblocksize; + int ncopies; + char *strval; + + if (nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0) + ncopies = atoi(strval); + else + ncopies = 1; + if (nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &volblocksize) != 0) + volblocksize = ZVOL_DEFAULT_BLOCKSIZE; + nblocks = volsize/volblocksize; + /* start with metadnode L0-L6 */ + numdb = 7; + /* calculate number of indirects */ + while (nblocks > 1) { + nblocks += DNODES_PER_LEVEL - 1; + nblocks /= DNODES_PER_LEVEL; + numdb += nblocks; + } + numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1); + volsize *= ncopies; + /* + * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't + * compressed, but in practice they compress down to about + * 1100 bytes + */ + numdb *= 1ULL << DN_MAX_INDBLKSHIFT; + volsize += numdb; + return (volsize); +} + +/* + * Attach/detach the given filesystem to/from the given jail. + */ +int +zfs_jail(zfs_handle_t *zhp, int jailid, int attach) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + zfs_cmd_t zc = { 0 }; + char errbuf[1024]; + unsigned long cmd; + int ret; + + if (attach) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot jail '%s'"), zhp->zfs_name); + } else { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot unjail '%s'"), zhp->zfs_name); + } + + switch (zhp->zfs_type) { + case ZFS_TYPE_VOLUME: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "volumes can not be jailed")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_SNAPSHOT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshots can not be jailed")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + } + assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + zc.zc_objset_type = DMU_OST_ZFS; + zc.zc_jailid = jailid; + + cmd = attach ? ZFS_IOC_JAIL : ZFS_IOC_UNJAIL; + if ((ret = ioctl(hdl->libzfs_fd, cmd, &zc)) != 0) + zfs_standard_error(hdl, errno, errbuf); + + return (ret); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c new file mode 100644 index 0000000..ab2007d --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c @@ -0,0 +1,834 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * zfs diff support + */ +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stddef.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <sys/zfs_ioctl.h> +#include <libzfs.h> +#include "libzfs_impl.h" + +#define ZDIFF_SNAPDIR "/.zfs/snapshot/" +#define ZDIFF_SHARESDIR "/.zfs/shares/" +#define ZDIFF_PREFIX "zfs-diff-%d" + +#define ZDIFF_ADDED '+' +#define ZDIFF_MODIFIED 'M' +#define ZDIFF_REMOVED '-' +#define ZDIFF_RENAMED 'R' + +static boolean_t +do_name_cmp(const char *fpath, const char *tpath) +{ + char *fname, *tname; + fname = strrchr(fpath, '/') + 1; + tname = strrchr(tpath, '/') + 1; + return (strcmp(fname, tname) == 0); +} + +typedef struct differ_info { + zfs_handle_t *zhp; + char *fromsnap; + char *frommnt; + char *tosnap; + char *tomnt; + char *ds; + char *dsmnt; + char *tmpsnap; + char errbuf[1024]; + boolean_t isclone; + boolean_t scripted; + boolean_t classify; + boolean_t timestamped; + uint64_t shares; + int zerr; + int cleanupfd; + int outputfd; + int datafd; +} differ_info_t; + +/* + * Given a {dsname, object id}, get the object path + */ +static int +get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj, + char *pn, int maxlen, zfs_stat_t *sb) +{ + zfs_cmd_t zc = { 0 }; + int error; + + (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); + zc.zc_obj = obj; + + errno = 0; + error = ioctl(di->zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_STATS, &zc); + di->zerr = errno; + + /* we can get stats even if we failed to get a path */ + (void) memcpy(sb, &zc.zc_stat, sizeof (zfs_stat_t)); + if (error == 0) { + ASSERT(di->zerr == 0); + (void) strlcpy(pn, zc.zc_value, maxlen); + return (0); + } + + if (di->zerr == EPERM) { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "The sys_config privilege or diff delegated permission " + "is needed\nto discover path names")); + return (-1); + } else { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "Unable to determine path or stats for " + "object %lld in %s"), obj, dsname); + return (-1); + } +} + +/* + * stream_bytes + * + * Prints a file name out a character at a time. If the character is + * not in the range of what we consider "printable" ASCII, display it + * as an escaped 3-digit octal value. ASCII values less than a space + * are all control characters and we declare the upper end as the + * DELete character. This also is the last 7-bit ASCII character. + * We choose to treat all 8-bit ASCII as not printable for this + * application. + */ +static void +stream_bytes(FILE *fp, const char *string) +{ + while (*string) { + if (*string > ' ' && *string != '\\' && *string < '\177') + (void) fprintf(fp, "%c", *string++); + else { + (void) fprintf(fp, "\\%03hho", + (unsigned char)*string++); + } + } +} + +static void +print_what(FILE *fp, mode_t what) +{ + char symbol; + + switch (what & S_IFMT) { + case S_IFBLK: + symbol = 'B'; + break; + case S_IFCHR: + symbol = 'C'; + break; + case S_IFDIR: + symbol = '/'; + break; +#ifdef S_IFDOOR + case S_IFDOOR: + symbol = '>'; + break; +#endif + case S_IFIFO: + symbol = '|'; + break; + case S_IFLNK: + symbol = '@'; + break; +#ifdef S_IFPORT + case S_IFPORT: + symbol = 'P'; + break; +#endif + case S_IFSOCK: + symbol = '='; + break; + case S_IFREG: + symbol = 'F'; + break; + default: + symbol = '?'; + break; + } + (void) fprintf(fp, "%c", symbol); +} + +static void +print_cmn(FILE *fp, differ_info_t *di, const char *file) +{ + stream_bytes(fp, di->dsmnt); + stream_bytes(fp, file); +} + +static void +print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new, + zfs_stat_t *isb) +{ + if (di->timestamped) + (void) fprintf(fp, "%10lld.%09lld\t", + (longlong_t)isb->zs_ctime[0], + (longlong_t)isb->zs_ctime[1]); + (void) fprintf(fp, "%c\t", ZDIFF_RENAMED); + if (di->classify) { + print_what(fp, isb->zs_mode); + (void) fprintf(fp, "\t"); + } + print_cmn(fp, di, old); + if (di->scripted) + (void) fprintf(fp, "\t"); + else + (void) fprintf(fp, " -> "); + print_cmn(fp, di, new); + (void) fprintf(fp, "\n"); +} + +static void +print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file, + zfs_stat_t *isb) +{ + if (di->timestamped) + (void) fprintf(fp, "%10lld.%09lld\t", + (longlong_t)isb->zs_ctime[0], + (longlong_t)isb->zs_ctime[1]); + (void) fprintf(fp, "%c\t", ZDIFF_MODIFIED); + if (di->classify) { + print_what(fp, isb->zs_mode); + (void) fprintf(fp, "\t"); + } + print_cmn(fp, di, file); + (void) fprintf(fp, "\t(%+d)", delta); + (void) fprintf(fp, "\n"); +} + +static void +print_file(FILE *fp, differ_info_t *di, char type, const char *file, + zfs_stat_t *isb) +{ + if (di->timestamped) + (void) fprintf(fp, "%10lld.%09lld\t", + (longlong_t)isb->zs_ctime[0], + (longlong_t)isb->zs_ctime[1]); + (void) fprintf(fp, "%c\t", type); + if (di->classify) { + print_what(fp, isb->zs_mode); + (void) fprintf(fp, "\t"); + } + print_cmn(fp, di, file); + (void) fprintf(fp, "\n"); +} + +static int +write_inuse_diffs_one(FILE *fp, differ_info_t *di, uint64_t dobj) +{ + struct zfs_stat fsb, tsb; + boolean_t same_name; + mode_t fmode, tmode; + char fobjname[MAXPATHLEN], tobjname[MAXPATHLEN]; + int fobjerr, tobjerr; + int change; + + if (dobj == di->shares) + return (0); + + /* + * Check the from and to snapshots for info on the object. If + * we get ENOENT, then the object just didn't exist in that + * snapshot. If we get ENOTSUP, then we tried to get + * info on a non-ZPL object, which we don't care about anyway. + */ + fobjerr = get_stats_for_obj(di, di->fromsnap, dobj, fobjname, + MAXPATHLEN, &fsb); + if (fobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP) + return (-1); + + tobjerr = get_stats_for_obj(di, di->tosnap, dobj, tobjname, + MAXPATHLEN, &tsb); + if (tobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP) + return (-1); + + /* + * Unallocated object sharing the same meta dnode block + */ + if (fobjerr && tobjerr) { + ASSERT(di->zerr == ENOENT || di->zerr == ENOTSUP); + di->zerr = 0; + return (0); + } + + di->zerr = 0; /* negate get_stats_for_obj() from side that failed */ + fmode = fsb.zs_mode & S_IFMT; + tmode = tsb.zs_mode & S_IFMT; + if (fmode == S_IFDIR || tmode == S_IFDIR || fsb.zs_links == 0 || + tsb.zs_links == 0) + change = 0; + else + change = tsb.zs_links - fsb.zs_links; + + if (fobjerr) { + if (change) { + print_link_change(fp, di, change, tobjname, &tsb); + return (0); + } + print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb); + return (0); + } else if (tobjerr) { + if (change) { + print_link_change(fp, di, change, fobjname, &fsb); + return (0); + } + print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb); + return (0); + } + + if (fmode != tmode && fsb.zs_gen == tsb.zs_gen) + tsb.zs_gen++; /* Force a generational difference */ + same_name = do_name_cmp(fobjname, tobjname); + + /* Simple modification or no change */ + if (fsb.zs_gen == tsb.zs_gen) { + /* No apparent changes. Could we assert !this? */ + if (fsb.zs_ctime[0] == tsb.zs_ctime[0] && + fsb.zs_ctime[1] == tsb.zs_ctime[1]) + return (0); + if (change) { + print_link_change(fp, di, change, + change > 0 ? fobjname : tobjname, &tsb); + } else if (same_name) { + print_file(fp, di, ZDIFF_MODIFIED, fobjname, &tsb); + } else { + print_rename(fp, di, fobjname, tobjname, &tsb); + } + return (0); + } else { + /* file re-created or object re-used */ + print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb); + print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb); + return (0); + } +} + +static int +write_inuse_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) +{ + uint64_t o; + int err; + + for (o = dr->ddr_first; o <= dr->ddr_last; o++) { + if (err = write_inuse_diffs_one(fp, di, o)) + return (err); + } + return (0); +} + +static int +describe_free(FILE *fp, differ_info_t *di, uint64_t object, char *namebuf, + int maxlen) +{ + struct zfs_stat sb; + + if (get_stats_for_obj(di, di->fromsnap, object, namebuf, + maxlen, &sb) != 0) { + /* Let it slide, if in the delete queue on from side */ + if (di->zerr == ENOENT && sb.zs_links == 0) { + di->zerr = 0; + return (0); + } + return (-1); + } + + print_file(fp, di, ZDIFF_REMOVED, namebuf, &sb); + return (0); +} + +static int +write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *lhdl = di->zhp->zfs_hdl; + char fobjname[MAXPATHLEN]; + + (void) strlcpy(zc.zc_name, di->fromsnap, sizeof (zc.zc_name)); + zc.zc_obj = dr->ddr_first - 1; + + ASSERT(di->zerr == 0); + + while (zc.zc_obj < dr->ddr_last) { + int err; + + err = ioctl(lhdl->libzfs_fd, ZFS_IOC_NEXT_OBJ, &zc); + if (err == 0) { + if (zc.zc_obj == di->shares) { + zc.zc_obj++; + continue; + } + if (zc.zc_obj > dr->ddr_last) { + break; + } + err = describe_free(fp, di, zc.zc_obj, fobjname, + MAXPATHLEN); + if (err) + break; + } else if (errno == ESRCH) { + break; + } else { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "next allocated object (> %lld) find failure"), + zc.zc_obj); + di->zerr = errno; + break; + } + } + if (di->zerr) + return (-1); + return (0); +} + +static void * +differ(void *arg) +{ + differ_info_t *di = arg; + dmu_diff_record_t dr; + FILE *ofp; + int err = 0; + + if ((ofp = fdopen(di->outputfd, "w")) == NULL) { + di->zerr = errno; + (void) strerror_r(errno, di->errbuf, sizeof (di->errbuf)); + (void) close(di->datafd); + return ((void *)-1); + } + + for (;;) { + char *cp = (char *)&dr; + int len = sizeof (dr); + int rv; + + do { + rv = read(di->datafd, cp, len); + cp += rv; + len -= rv; + } while (len > 0 && rv > 0); + + if (rv < 0 || (rv == 0 && len != sizeof (dr))) { + di->zerr = EPIPE; + break; + } else if (rv == 0) { + /* end of file at a natural breaking point */ + break; + } + + switch (dr.ddr_type) { + case DDR_FREE: + err = write_free_diffs(ofp, di, &dr); + break; + case DDR_INUSE: + err = write_inuse_diffs(ofp, di, &dr); + break; + default: + di->zerr = EPIPE; + break; + } + + if (err || di->zerr) + break; + } + + (void) fclose(ofp); + (void) close(di->datafd); + if (err) + return ((void *)-1); + if (di->zerr) { + ASSERT(di->zerr == EINVAL); + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "Internal error: bad data from diff IOCTL")); + return ((void *)-1); + } + return ((void *)0); +} + +static int +find_shares_object(differ_info_t *di) +{ + char fullpath[MAXPATHLEN]; + struct stat64 sb = { 0 }; + + (void) strlcpy(fullpath, di->dsmnt, MAXPATHLEN); + (void) strlcat(fullpath, ZDIFF_SHARESDIR, MAXPATHLEN); + + if (stat64(fullpath, &sb) != 0) { +#ifdef sun + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, "Cannot stat %s"), fullpath); + return (zfs_error(di->zhp->zfs_hdl, EZFS_DIFF, di->errbuf)); +#else + return (0); +#endif + } + + di->shares = (uint64_t)sb.st_ino; + return (0); +} + +static int +make_temp_snapshot(differ_info_t *di) +{ + libzfs_handle_t *hdl = di->zhp->zfs_hdl; + zfs_cmd_t zc = { 0 }; + + (void) snprintf(zc.zc_value, sizeof (zc.zc_value), + ZDIFF_PREFIX, getpid()); + (void) strlcpy(zc.zc_name, di->ds, sizeof (zc.zc_name)); + zc.zc_cleanup_fd = di->cleanupfd; + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_TMP_SNAPSHOT, &zc) != 0) { + int err = errno; + if (err == EPERM) { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, "The diff delegated " + "permission is needed in order\nto create a " + "just-in-time snapshot for diffing\n")); + return (zfs_error(hdl, EZFS_DIFF, di->errbuf)); + } else { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, "Cannot create just-in-time " + "snapshot of '%s'"), zc.zc_name); + return (zfs_standard_error(hdl, err, di->errbuf)); + } + } + + di->tmpsnap = zfs_strdup(hdl, zc.zc_value); + di->tosnap = zfs_asprintf(hdl, "%s@%s", di->ds, di->tmpsnap); + return (0); +} + +static void +teardown_differ_info(differ_info_t *di) +{ + free(di->ds); + free(di->dsmnt); + free(di->fromsnap); + free(di->frommnt); + free(di->tosnap); + free(di->tmpsnap); + free(di->tomnt); + (void) close(di->cleanupfd); +} + +static int +get_snapshot_names(differ_info_t *di, const char *fromsnap, + const char *tosnap) +{ + libzfs_handle_t *hdl = di->zhp->zfs_hdl; + char *atptrf = NULL; + char *atptrt = NULL; + int fdslen, fsnlen; + int tdslen, tsnlen; + + /* + * Can accept + * dataset@snap1 + * dataset@snap1 dataset@snap2 + * dataset@snap1 @snap2 + * dataset@snap1 dataset + * @snap1 dataset@snap2 + */ + if (tosnap == NULL) { + /* only a from snapshot given, must be valid */ + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "Badly formed snapshot name %s"), fromsnap); + + if (!zfs_validate_name(hdl, fromsnap, ZFS_TYPE_SNAPSHOT, + B_FALSE)) { + return (zfs_error(hdl, EZFS_INVALIDNAME, + di->errbuf)); + } + + atptrf = strchr(fromsnap, '@'); + ASSERT(atptrf != NULL); + fdslen = atptrf - fromsnap; + + di->fromsnap = zfs_strdup(hdl, fromsnap); + di->ds = zfs_strdup(hdl, fromsnap); + di->ds[fdslen] = '\0'; + + /* the to snap will be a just-in-time snap of the head */ + return (make_temp_snapshot(di)); + } + + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "Unable to determine which snapshots to compare")); + + atptrf = strchr(fromsnap, '@'); + atptrt = strchr(tosnap, '@'); + fdslen = atptrf ? atptrf - fromsnap : strlen(fromsnap); + tdslen = atptrt ? atptrt - tosnap : strlen(tosnap); + fsnlen = strlen(fromsnap) - fdslen; /* includes @ sign */ + tsnlen = strlen(tosnap) - tdslen; /* includes @ sign */ + + if (fsnlen <= 1 || tsnlen == 1 || (fdslen == 0 && tdslen == 0) || + (fsnlen == 0 && tsnlen == 0)) { + return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf)); + } else if ((fdslen > 0 && tdslen > 0) && + ((tdslen != fdslen || strncmp(fromsnap, tosnap, fdslen) != 0))) { + /* + * not the same dataset name, might be okay if + * tosnap is a clone of a fromsnap descendant. + */ + char origin[ZFS_MAXNAMELEN]; + zprop_source_t src; + zfs_handle_t *zhp; + + di->ds = zfs_alloc(di->zhp->zfs_hdl, tdslen + 1); + (void) strncpy(di->ds, tosnap, tdslen); + di->ds[tdslen] = '\0'; + + zhp = zfs_open(hdl, di->ds, ZFS_TYPE_FILESYSTEM); + while (zhp != NULL) { + (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN, + origin, sizeof (origin), &src, NULL, 0, B_FALSE); + + if (strncmp(origin, fromsnap, fsnlen) == 0) + break; + + (void) zfs_close(zhp); + zhp = zfs_open(hdl, origin, ZFS_TYPE_FILESYSTEM); + } + + if (zhp == NULL) { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "Not an earlier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf)); + } else { + (void) zfs_close(zhp); + } + + di->isclone = B_TRUE; + di->fromsnap = zfs_strdup(hdl, fromsnap); + if (tsnlen) { + di->tosnap = zfs_strdup(hdl, tosnap); + } else { + return (make_temp_snapshot(di)); + } + } else { + int dslen = fdslen ? fdslen : tdslen; + + di->ds = zfs_alloc(hdl, dslen + 1); + (void) strncpy(di->ds, fdslen ? fromsnap : tosnap, dslen); + di->ds[dslen] = '\0'; + + di->fromsnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrf); + if (tsnlen) { + di->tosnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrt); + } else { + return (make_temp_snapshot(di)); + } + } + return (0); +} + +static int +get_mountpoint(differ_info_t *di, char *dsnm, char **mntpt) +{ + boolean_t mounted; + + mounted = is_mounted(di->zhp->zfs_hdl, dsnm, mntpt); + if (mounted == B_FALSE) { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "Cannot diff an unmounted snapshot")); + return (zfs_error(di->zhp->zfs_hdl, EZFS_BADTYPE, di->errbuf)); + } + + /* Avoid a double slash at the beginning of root-mounted datasets */ + if (**mntpt == '/' && *(*mntpt + 1) == '\0') + **mntpt = '\0'; + return (0); +} + +static int +get_mountpoints(differ_info_t *di) +{ + char *strptr; + char *frommntpt; + + /* + * first get the mountpoint for the parent dataset + */ + if (get_mountpoint(di, di->ds, &di->dsmnt) != 0) + return (-1); + + strptr = strchr(di->tosnap, '@'); + ASSERT3P(strptr, !=, NULL); + di->tomnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", di->dsmnt, + ZDIFF_SNAPDIR, ++strptr); + + strptr = strchr(di->fromsnap, '@'); + ASSERT3P(strptr, !=, NULL); + + frommntpt = di->dsmnt; + if (di->isclone) { + char *mntpt; + int err; + + *strptr = '\0'; + err = get_mountpoint(di, di->fromsnap, &mntpt); + *strptr = '@'; + if (err != 0) + return (-1); + frommntpt = mntpt; + } + + di->frommnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", frommntpt, + ZDIFF_SNAPDIR, ++strptr); + + if (di->isclone) + free(frommntpt); + + return (0); +} + +static int +setup_differ_info(zfs_handle_t *zhp, const char *fromsnap, + const char *tosnap, differ_info_t *di) +{ + di->zhp = zhp; + + di->cleanupfd = open(ZFS_DEV, O_RDWR|O_EXCL); + VERIFY(di->cleanupfd >= 0); + + if (get_snapshot_names(di, fromsnap, tosnap) != 0) + return (-1); + + if (get_mountpoints(di) != 0) + return (-1); + + if (find_shares_object(di) != 0) + return (-1); + + return (0); +} + +int +zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap, + const char *tosnap, int flags) +{ + zfs_cmd_t zc = { 0 }; + char errbuf[1024]; + differ_info_t di = { 0 }; + pthread_t tid; + int pipefd[2]; + int iocerr; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "zfs diff failed")); + + if (setup_differ_info(zhp, fromsnap, tosnap, &di)) { + teardown_differ_info(&di); + return (-1); + } + + if (pipe(pipefd)) { + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + teardown_differ_info(&di); + return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf)); + } + + di.scripted = (flags & ZFS_DIFF_PARSEABLE); + di.classify = (flags & ZFS_DIFF_CLASSIFY); + di.timestamped = (flags & ZFS_DIFF_TIMESTAMP); + + di.outputfd = outfd; + di.datafd = pipefd[0]; + + if (pthread_create(&tid, NULL, differ, &di)) { + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + (void) close(pipefd[0]); + (void) close(pipefd[1]); + teardown_differ_info(&di); + return (zfs_error(zhp->zfs_hdl, + EZFS_THREADCREATEFAILED, errbuf)); + } + + /* do the ioctl() */ + (void) strlcpy(zc.zc_value, di.fromsnap, strlen(di.fromsnap) + 1); + (void) strlcpy(zc.zc_name, di.tosnap, strlen(di.tosnap) + 1); + zc.zc_cookie = pipefd[1]; + + iocerr = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DIFF, &zc); + if (iocerr != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Unable to obtain diffs")); + if (errno == EPERM) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "\n The sys_mount privilege or diff delegated " + "permission is needed\n to execute the " + "diff ioctl")); + } else if (errno == EXDEV) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "\n Not an earlier snapshot from the same fs")); + } else if (errno != EPIPE || di.zerr == 0) { + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + } + (void) close(pipefd[1]); + (void) pthread_cancel(tid); + (void) pthread_join(tid, NULL); + teardown_differ_info(&di); + if (di.zerr != 0 && di.zerr != EPIPE) { + zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr)); + return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf)); + } else { + return (zfs_error(zhp->zfs_hdl, EZFS_DIFFDATA, errbuf)); + } + } + + (void) close(pipefd[1]); + (void) pthread_join(tid, NULL); + + if (di.zerr != 0) { + zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr)); + return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf)); + } + teardown_differ_info(&di); + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c new file mode 100644 index 0000000..788fa2c --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c @@ -0,0 +1,452 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <dlfcn.h> +#include <errno.h> +#include <libintl.h> +#include <link.h> +#include <pthread.h> +#include <strings.h> +#include <unistd.h> + +#include <libzfs.h> + +#include <fm/libtopo.h> +#include <sys/fm/protocol.h> +#include <sys/systeminfo.h> + +#include "libzfs_impl.h" + +/* + * This file is responsible for determining the relationship between I/O + * devices paths and physical locations. In the world of MPxIO and external + * enclosures, the device path is not synonymous with the physical location. + * If you remove a drive and insert it into a different slot, it will end up + * with the same path under MPxIO. If you recable storage enclosures, the + * device paths may change. All of this makes it difficult to implement the + * 'autoreplace' property, which is supposed to automatically manage disk + * replacement based on physical slot. + * + * In order to work around these limitations, we have a per-vdev FRU property + * that is the libtopo path (minus disk-specific authority information) to the + * physical location of the device on the system. This is an optional + * property, and is only needed when using the 'autoreplace' property or when + * generating FMA faults against vdevs. + */ + +/* + * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case + * it is not present. We only need this once per library instance, so it is + * not part of the libzfs handle. + */ +static void *_topo_dlhandle; +static topo_hdl_t *(*_topo_open)(int, const char *, int *); +static void (*_topo_close)(topo_hdl_t *); +static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *); +static void (*_topo_snap_release)(topo_hdl_t *); +static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *, + topo_walk_cb_t, void *, int *); +static int (*_topo_walk_step)(topo_walk_t *, int); +static void (*_topo_walk_fini)(topo_walk_t *); +static void (*_topo_hdl_strfree)(topo_hdl_t *, char *); +static char *(*_topo_node_name)(tnode_t *); +static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *, + char **, int *); +static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *); +static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *); +static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *, + const char *); + +#define ZFS_FRU_HASH_SIZE 257 + +static size_t +fru_strhash(const char *key) +{ + ulong_t g, h = 0; + const char *p; + + for (p = key; *p != '\0'; p++) { + h = (h << 4) + *p; + + if ((g = (h & 0xf0000000)) != 0) { + h ^= (g >> 24); + h ^= g; + } + } + + return (h % ZFS_FRU_HASH_SIZE); +} + +static int +libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg) +{ + libzfs_handle_t *hdl = arg; + nvlist_t *fru; + char *devpath, *frustr; + int err; + libzfs_fru_t *frup; + size_t idx; + + /* + * If this is the chassis node, and we don't yet have the system + * chassis ID, then fill in this value now. + */ + if (hdl->libzfs_chassis_id[0] == '\0' && + strcmp(_topo_node_name(tn), "chassis") == 0) { + if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY, + FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0) + (void) strlcpy(hdl->libzfs_chassis_id, devpath, + sizeof (hdl->libzfs_chassis_id)); + } + + /* + * Skip non-disk nodes. + */ + if (strcmp(_topo_node_name(tn), "disk") != 0) + return (TOPO_WALK_NEXT); + + /* + * Get the devfs path and FRU. + */ + if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0) + return (TOPO_WALK_NEXT); + + if (libzfs_fru_lookup(hdl, devpath) != NULL) { + _topo_hdl_strfree(thp, devpath); + return (TOPO_WALK_NEXT); + } + + if (_topo_node_fru(tn, &fru, NULL, &err) != 0) { + _topo_hdl_strfree(thp, devpath); + return (TOPO_WALK_NEXT); + } + + /* + * Convert the FRU into a string. + */ + if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) { + nvlist_free(fru); + _topo_hdl_strfree(thp, devpath); + return (TOPO_WALK_NEXT); + } + + nvlist_free(fru); + + /* + * Finally, we have a FRU string and device path. Add it to the hash. + */ + if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) { + _topo_hdl_strfree(thp, devpath); + _topo_hdl_strfree(thp, frustr); + return (TOPO_WALK_NEXT); + } + + if ((frup->zf_device = strdup(devpath)) == NULL || + (frup->zf_fru = strdup(frustr)) == NULL) { + free(frup->zf_device); + free(frup); + _topo_hdl_strfree(thp, devpath); + _topo_hdl_strfree(thp, frustr); + return (TOPO_WALK_NEXT); + } + + _topo_hdl_strfree(thp, devpath); + _topo_hdl_strfree(thp, frustr); + + idx = fru_strhash(frup->zf_device); + frup->zf_chain = hdl->libzfs_fru_hash[idx]; + hdl->libzfs_fru_hash[idx] = frup; + frup->zf_next = hdl->libzfs_fru_list; + hdl->libzfs_fru_list = frup; + + return (TOPO_WALK_NEXT); +} + +/* + * Called during initialization to setup the dynamic libtopo connection. + */ +#pragma init(libzfs_init_fru) +static void +libzfs_init_fru(void) +{ + char path[MAXPATHLEN]; + char isa[257]; + +#if defined(_LP64) + if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0) + isa[0] = '\0'; +#else + isa[0] = '\0'; +#endif + (void) snprintf(path, sizeof (path), + "/usr/lib/fm/%s/libtopo.so", isa); + + if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL) + return; + + _topo_open = (topo_hdl_t *(*)()) + dlsym(_topo_dlhandle, "topo_open"); + _topo_close = (void (*)()) + dlsym(_topo_dlhandle, "topo_close"); + _topo_snap_hold = (char *(*)()) + dlsym(_topo_dlhandle, "topo_snap_hold"); + _topo_snap_release = (void (*)()) + dlsym(_topo_dlhandle, "topo_snap_release"); + _topo_walk_init = (topo_walk_t *(*)()) + dlsym(_topo_dlhandle, "topo_walk_init"); + _topo_walk_step = (int (*)()) + dlsym(_topo_dlhandle, "topo_walk_step"); + _topo_walk_fini = (void (*)()) + dlsym(_topo_dlhandle, "topo_walk_fini"); + _topo_hdl_strfree = (void (*)()) + dlsym(_topo_dlhandle, "topo_hdl_strfree"); + _topo_node_name = (char *(*)()) + dlsym(_topo_dlhandle, "topo_node_name"); + _topo_prop_get_string = (int (*)()) + dlsym(_topo_dlhandle, "topo_prop_get_string"); + _topo_node_fru = (int (*)()) + dlsym(_topo_dlhandle, "topo_node_fru"); + _topo_fmri_nvl2str = (int (*)()) + dlsym(_topo_dlhandle, "topo_fmri_nvl2str"); + _topo_fmri_strcmp_noauth = (int (*)()) + dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth"); + + if (_topo_open == NULL || _topo_close == NULL || + _topo_snap_hold == NULL || _topo_snap_release == NULL || + _topo_walk_init == NULL || _topo_walk_step == NULL || + _topo_walk_fini == NULL || _topo_hdl_strfree == NULL || + _topo_node_name == NULL || _topo_prop_get_string == NULL || + _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL || + _topo_fmri_strcmp_noauth == NULL) { + (void) dlclose(_topo_dlhandle); + _topo_dlhandle = NULL; + } +} + +/* + * Refresh the mappings from device path -> FMRI. We do this by walking the + * hc topology looking for disk nodes, and recording the io/devfs-path and FRU. + * Note that we strip out the disk-specific authority information (serial, + * part, revision, etc) so that we are left with only the identifying + * characteristics of the slot (hc path and chassis-id). + */ +void +libzfs_fru_refresh(libzfs_handle_t *hdl) +{ + int err; + char *uuid; + topo_hdl_t *thp; + topo_walk_t *twp; + + if (_topo_dlhandle == NULL) + return; + + /* + * Clear the FRU hash and initialize our basic structures. + */ + libzfs_fru_clear(hdl, B_FALSE); + + if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION, + NULL, &err)) == NULL) + return; + + thp = hdl->libzfs_topo_hdl; + + if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL) + return; + + _topo_hdl_strfree(thp, uuid); + + if (hdl->libzfs_fru_hash == NULL && + (hdl->libzfs_fru_hash = + calloc(ZFS_FRU_HASH_SIZE * sizeof (void *), 1)) == NULL) + return; + + /* + * We now have a topo snapshot, so iterate over the hc topology looking + * for disks to add to the hash. + */ + twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC, + libzfs_fru_gather, hdl, &err); + if (twp != NULL) { + (void) _topo_walk_step(twp, TOPO_WALK_CHILD); + _topo_walk_fini(twp); + } +} + +/* + * Given a devfs path, return the FRU for the device, if known. This will + * automatically call libzfs_fru_refresh() if it hasn't already been called by + * the consumer. The string returned is valid until the next call to + * libzfs_fru_refresh(). + */ +const char * +libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath) +{ + size_t idx = fru_strhash(devpath); + libzfs_fru_t *frup; + + if (hdl->libzfs_fru_hash == NULL) + libzfs_fru_refresh(hdl); + + if (hdl->libzfs_fru_hash == NULL) + return (NULL); + + for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL; + frup = frup->zf_chain) { + if (strcmp(devpath, frup->zf_device) == 0) + return (frup->zf_fru); + } + + return (NULL); +} + +/* + * Given a fru path, return the device path. This will automatically call + * libzfs_fru_refresh() if it hasn't already been called by the consumer. The + * string returned is valid until the next call to libzfs_fru_refresh(). + */ +const char * +libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru) +{ + libzfs_fru_t *frup; + size_t idx; + + if (hdl->libzfs_fru_hash == NULL) + libzfs_fru_refresh(hdl); + + if (hdl->libzfs_fru_hash == NULL) + return (NULL); + + for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) { + for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL; + frup = frup->zf_next) { + if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, + fru, frup->zf_fru)) + return (frup->zf_device); + } + } + + return (NULL); +} + +/* + * Change the stored FRU for the given vdev. + */ +int +zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru) +{ + zfs_cmd_t zc = { 0 }; + + (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value)); + zc.zc_guid = vdev_guid; + + if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0) + return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot set FRU"))); + + return (0); +} + +/* + * Compare to two FRUs, ignoring any authority information. + */ +boolean_t +libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b) +{ + if (hdl->libzfs_fru_hash == NULL) + libzfs_fru_refresh(hdl); + + if (hdl->libzfs_fru_hash == NULL) + return (strcmp(a, b) == 0); + + return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b)); +} + +/* + * This special function checks to see whether the FRU indicates it's supposed + * to be in the system chassis, but the chassis-id doesn't match. This can + * happen in a clustered case, where both head nodes have the same logical + * disk, but opening the device on the other head node is meaningless. + */ +boolean_t +libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru) +{ + const char *chassisid; + size_t len; + + if (hdl->libzfs_fru_hash == NULL) + libzfs_fru_refresh(hdl); + + if (hdl->libzfs_chassis_id[0] == '\0') + return (B_FALSE); + + if (strstr(fru, "/chassis=0/") == NULL) + return (B_FALSE); + + if ((chassisid = strstr(fru, ":chassis-id=")) == NULL) + return (B_FALSE); + + chassisid += 12; + len = strlen(hdl->libzfs_chassis_id); + if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 && + (chassisid[len] == '/' || chassisid[len] == ':')) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * Clear memory associated with the FRU hash. + */ +void +libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final) +{ + libzfs_fru_t *frup; + + while ((frup = hdl->libzfs_fru_list) != NULL) { + hdl->libzfs_fru_list = frup->zf_next; + free(frup->zf_device); + free(frup->zf_fru); + free(frup); + } + + hdl->libzfs_fru_list = NULL; + + if (hdl->libzfs_topo_hdl != NULL) { + _topo_snap_release(hdl->libzfs_topo_hdl); + _topo_close(hdl->libzfs_topo_hdl); + hdl->libzfs_topo_hdl = NULL; + } + + if (final) { + free(hdl->libzfs_fru_hash); + } else if (hdl->libzfs_fru_hash != NULL) { + bzero(hdl->libzfs_fru_hash, + ZFS_FRU_HASH_SIZE * sizeof (void *)); + } +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h new file mode 100644 index 0000000..481ae52 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h @@ -0,0 +1,223 @@ +/* + * CDDL HEADER SART + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. + * All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + */ + +#ifndef _LIBZFS_IMPL_H +#define _LIBZFS_IMPL_H + +#include <sys/dmu.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_ioctl.h> +#include <sys/spa.h> +#include <sys/nvpair.h> + +#include <libshare.h> +#include <libuutil.h> +#include <libzfs.h> +#include <libzfs_core.h> +#include <libzfs_compat.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef VERIFY +#undef VERIFY +#endif +#define VERIFY verify + +typedef struct libzfs_fru { + char *zf_device; + char *zf_fru; + struct libzfs_fru *zf_chain; + struct libzfs_fru *zf_next; +} libzfs_fru_t; + +struct libzfs_handle { + int libzfs_error; + int libzfs_fd; + FILE *libzfs_mnttab; + FILE *libzfs_sharetab; + zpool_handle_t *libzfs_pool_handles; + uu_avl_pool_t *libzfs_ns_avlpool; + uu_avl_t *libzfs_ns_avl; + uint64_t libzfs_ns_gen; + int libzfs_desc_active; + char libzfs_action[1024]; + char libzfs_desc[1024]; + int libzfs_printerr; + int libzfs_storeerr; /* stuff error messages into buffer */ + void *libzfs_sharehdl; /* libshare handle */ + uint_t libzfs_shareflags; + boolean_t libzfs_mnttab_enable; + avl_tree_t libzfs_mnttab_cache; + int libzfs_pool_iter; + libzfs_fru_t **libzfs_fru_hash; + libzfs_fru_t *libzfs_fru_list; + char libzfs_chassis_id[256]; +}; + +#define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */ + +struct zfs_handle { + libzfs_handle_t *zfs_hdl; + zpool_handle_t *zpool_hdl; + char zfs_name[ZFS_MAXNAMELEN]; + zfs_type_t zfs_type; /* type including snapshot */ + zfs_type_t zfs_head_type; /* type excluding snapshot */ + dmu_objset_stats_t zfs_dmustats; + nvlist_t *zfs_props; + nvlist_t *zfs_user_props; + nvlist_t *zfs_recvd_props; + boolean_t zfs_mntcheck; + char *zfs_mntopts; + uint8_t *zfs_props_table; +}; + +/* + * This is different from checking zfs_type, because it will also catch + * snapshots of volumes. + */ +#define ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME) + +struct zpool_handle { + libzfs_handle_t *zpool_hdl; + zpool_handle_t *zpool_next; + char zpool_name[ZPOOL_MAXNAMELEN]; + int zpool_state; + size_t zpool_config_size; + nvlist_t *zpool_config; + nvlist_t *zpool_old_config; + nvlist_t *zpool_props; + diskaddr_t zpool_start_block; +}; + +typedef enum { + PROTO_NFS = 0, + PROTO_SMB = 1, + PROTO_END = 2 +} zfs_share_proto_t; + +/* + * The following can be used as a bitmask and any new values + * added must preserve that capability. + */ +typedef enum { + SHARED_NOT_SHARED = 0x0, + SHARED_NFS = 0x2, + SHARED_SMB = 0x4 +} zfs_share_type_t; + +int zfs_error(libzfs_handle_t *, int, const char *); +int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...); +void zfs_error_aux(libzfs_handle_t *, const char *, ...); +void *zfs_alloc(libzfs_handle_t *, size_t); +void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t); +char *zfs_asprintf(libzfs_handle_t *, const char *, ...); +char *zfs_strdup(libzfs_handle_t *, const char *); +int no_memory(libzfs_handle_t *); + +int zfs_standard_error(libzfs_handle_t *, int, const char *); +int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...); +int zpool_standard_error(libzfs_handle_t *, int, const char *); +int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...); + +int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***, + size_t *); +zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *); +zfs_handle_t *make_dataset_simple_handle_zc(zfs_handle_t *, zfs_cmd_t *); + +int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t, + nvlist_t *, char **, uint64_t *, const char *); +int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, + zfs_type_t type); + +/* + * Use this changelist_gather() flag to force attempting mounts + * on each change node regardless of whether or not it is currently + * mounted. + */ +#define CL_GATHER_MOUNT_ALWAYS 0x01 +/* + * Use this changelist_gather() flag to prevent unmounting of file systems. + */ +#define CL_GATHER_DONT_UNMOUNT 0x02 + +typedef struct prop_changelist prop_changelist_t; + +int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t); +int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *); +int zcmd_write_conf_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *); +int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *); +int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **); +void zcmd_free_nvlists(zfs_cmd_t *); + +int changelist_prefix(prop_changelist_t *); +int changelist_postfix(prop_changelist_t *); +void changelist_rename(prop_changelist_t *, const char *, const char *); +void changelist_remove(prop_changelist_t *, const char *); +void changelist_free(prop_changelist_t *); +prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int, int); +int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *); +int changelist_haszonedchild(prop_changelist_t *); + +void remove_mountpoint(zfs_handle_t *); +int create_parents(libzfs_handle_t *, char *, int); +boolean_t isa_child_of(const char *dataset, const char *parent); + +zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *); +zfs_handle_t *make_bookmark_handle(zfs_handle_t *, const char *, + nvlist_t *props); + +int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **); + +boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *); + +int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, + boolean_t modifying); + +void namespace_clear(libzfs_handle_t *); + +/* + * libshare (sharemgr) interfaces used internally. + */ + +extern int zfs_init_libshare(libzfs_handle_t *, int); +extern void zfs_uninit_libshare(libzfs_handle_t *); +extern int zfs_parse_options(char *, zfs_share_proto_t); + +extern int zfs_unshare_proto(zfs_handle_t *, + const char *, zfs_share_proto_t *); + +extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_IMPL_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c new file mode 100644 index 0000000..49a4385 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c @@ -0,0 +1,1741 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Pool import support functions. + * + * To import a pool, we rely on reading the configuration information from the + * ZFS label of each device. If we successfully read the label, then we + * organize the configuration information in the following hierarchy: + * + * pool guid -> toplevel vdev guid -> label txg + * + * Duplicate entries matching this same tuple will be discarded. Once we have + * examined every device, we pick the best label txg config for each toplevel + * vdev. We then arrange these toplevel vdevs into a complete pool config, and + * update any paths that have changed. Finally, we attempt to import the pool + * using our derived config, and record the results. + */ + +#include <ctype.h> +#include <devid.h> +#include <dirent.h> +#include <errno.h> +#include <libintl.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <thread_pool.h> +#include <libgeom.h> + +#include <sys/vdev_impl.h> + +#include "libzfs.h" +#include "libzfs_impl.h" + +/* + * Intermediate structures used to gather configuration information. + */ +typedef struct config_entry { + uint64_t ce_txg; + nvlist_t *ce_config; + struct config_entry *ce_next; +} config_entry_t; + +typedef struct vdev_entry { + uint64_t ve_guid; + config_entry_t *ve_configs; + struct vdev_entry *ve_next; +} vdev_entry_t; + +typedef struct pool_entry { + uint64_t pe_guid; + vdev_entry_t *pe_vdevs; + struct pool_entry *pe_next; +} pool_entry_t; + +typedef struct name_entry { + char *ne_name; + uint64_t ne_guid; + struct name_entry *ne_next; +} name_entry_t; + +typedef struct pool_list { + pool_entry_t *pools; + name_entry_t *names; +} pool_list_t; + +static char * +get_devid(const char *path) +{ +#ifdef have_devid + int fd; + ddi_devid_t devid; + char *minor, *ret; + + if ((fd = open(path, O_RDONLY)) < 0) + return (NULL); + + minor = NULL; + ret = NULL; + if (devid_get(fd, &devid) == 0) { + if (devid_get_minor_name(fd, &minor) == 0) + ret = devid_str_encode(devid, minor); + if (minor != NULL) + devid_str_free(minor); + devid_free(devid); + } + (void) close(fd); + + return (ret); +#else + return (NULL); +#endif +} + + +/* + * Go through and fix up any path and/or devid information for the given vdev + * configuration. + */ +static int +fix_paths(nvlist_t *nv, name_entry_t *names) +{ + nvlist_t **child; + uint_t c, children; + uint64_t guid; + name_entry_t *ne, *best; + char *path, *devid; + int matched; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if (fix_paths(child[c], names) != 0) + return (-1); + return (0); + } + + /* + * This is a leaf (file or disk) vdev. In either case, go through + * the name list and see if we find a matching guid. If so, replace + * the path and see if we can calculate a new devid. + * + * There may be multiple names associated with a particular guid, in + * which case we have overlapping slices or multiple paths to the same + * disk. If this is the case, then we want to pick the path that is + * the most similar to the original, where "most similar" is the number + * of matching characters starting from the end of the path. This will + * preserve slice numbers even if the disks have been reorganized, and + * will also catch preferred disk names if multiple paths exist. + */ + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) + path = NULL; + + matched = 0; + best = NULL; + for (ne = names; ne != NULL; ne = ne->ne_next) { + if (ne->ne_guid == guid) { + const char *src, *dst; + int count; + + if (path == NULL) { + best = ne; + break; + } + + src = ne->ne_name + strlen(ne->ne_name) - 1; + dst = path + strlen(path) - 1; + for (count = 0; src >= ne->ne_name && dst >= path; + src--, dst--, count++) + if (*src != *dst) + break; + + /* + * At this point, 'count' is the number of characters + * matched from the end. + */ + if (count > matched || best == NULL) { + best = ne; + matched = count; + } + } + } + + if (best == NULL) + return (0); + + if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) + return (-1); + + if ((devid = get_devid(best->ne_name)) == NULL) { + (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); + } else { + if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) + return (-1); + devid_str_free(devid); + } + + return (0); +} + +/* + * Add the given configuration to the list of known devices. + */ +static int +add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, + nvlist_t *config) +{ + uint64_t pool_guid, vdev_guid, top_guid, txg, state; + pool_entry_t *pe; + vdev_entry_t *ve; + config_entry_t *ce; + name_entry_t *ne; + + /* + * If this is a hot spare not currently in use or level 2 cache + * device, add it to the list of names to translate, but don't do + * anything else. + */ + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, + &state) == 0 && + (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) && + nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { + if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) + return (-1); + + if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { + free(ne); + return (-1); + } + ne->ne_guid = vdev_guid; + ne->ne_next = pl->names; + pl->names = ne; + return (0); + } + + /* + * If we have a valid config but cannot read any of these fields, then + * it means we have a half-initialized label. In vdev_label_init() + * we write a label with txg == 0 so that we can identify the device + * in case the user refers to the same disk later on. If we fail to + * create the pool, we'll be left with a label in this state + * which should not be considered part of a valid pool. + */ + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &pool_guid) != 0 || + nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, + &vdev_guid) != 0 || + nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, + &top_guid) != 0 || + nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, + &txg) != 0 || txg == 0) { + nvlist_free(config); + return (0); + } + + /* + * First, see if we know about this pool. If not, then add it to the + * list of known pools. + */ + for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { + if (pe->pe_guid == pool_guid) + break; + } + + if (pe == NULL) { + if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) { + nvlist_free(config); + return (-1); + } + pe->pe_guid = pool_guid; + pe->pe_next = pl->pools; + pl->pools = pe; + } + + /* + * Second, see if we know about this toplevel vdev. Add it if its + * missing. + */ + for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { + if (ve->ve_guid == top_guid) + break; + } + + if (ve == NULL) { + if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { + nvlist_free(config); + return (-1); + } + ve->ve_guid = top_guid; + ve->ve_next = pe->pe_vdevs; + pe->pe_vdevs = ve; + } + + /* + * Third, see if we have a config with a matching transaction group. If + * so, then we do nothing. Otherwise, add it to the list of known + * configs. + */ + for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { + if (ce->ce_txg == txg) + break; + } + + if (ce == NULL) { + if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) { + nvlist_free(config); + return (-1); + } + ce->ce_txg = txg; + ce->ce_config = config; + ce->ce_next = ve->ve_configs; + ve->ve_configs = ce; + } else { + nvlist_free(config); + } + + /* + * At this point we've successfully added our config to the list of + * known configs. The last thing to do is add the vdev guid -> path + * mappings so that we can fix up the configuration as necessary before + * doing the import. + */ + if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) + return (-1); + + if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { + free(ne); + return (-1); + } + + ne->ne_guid = vdev_guid; + ne->ne_next = pl->names; + pl->names = ne; + + return (0); +} + +/* + * Returns true if the named pool matches the given GUID. + */ +static int +pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid, + boolean_t *isactive) +{ + zpool_handle_t *zhp; + uint64_t theguid; + + if (zpool_open_silent(hdl, name, &zhp) != 0) + return (-1); + + if (zhp == NULL) { + *isactive = B_FALSE; + return (0); + } + + verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID, + &theguid) == 0); + + zpool_close(zhp); + + *isactive = (theguid == guid); + return (0); +} + +static nvlist_t * +refresh_config(libzfs_handle_t *hdl, nvlist_t *config) +{ + nvlist_t *nvl; + zfs_cmd_t zc = { 0 }; + int err; + + if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) + return (NULL); + + if (zcmd_alloc_dst_nvlist(hdl, &zc, + zc.zc_nvlist_conf_size * 2) != 0) { + zcmd_free_nvlists(&zc); + return (NULL); + } + + while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT, + &zc)) != 0 && errno == ENOMEM) { + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + zcmd_free_nvlists(&zc); + return (NULL); + } + } + + if (err) { + zcmd_free_nvlists(&zc); + return (NULL); + } + + if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) { + zcmd_free_nvlists(&zc); + return (NULL); + } + + zcmd_free_nvlists(&zc); + return (nvl); +} + +/* + * Determine if the vdev id is a hole in the namespace. + */ +boolean_t +vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) +{ + for (int c = 0; c < holes; c++) { + + /* Top-level is a hole */ + if (hole_array[c] == id) + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Convert our list of pools into the definitive set of configurations. We + * start by picking the best config for each toplevel vdev. Once that's done, + * we assemble the toplevel vdevs into a full config for the pool. We make a + * pass to fix up any incorrect paths, and then add it to the main list to + * return to the user. + */ +static nvlist_t * +get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) +{ + pool_entry_t *pe; + vdev_entry_t *ve; + config_entry_t *ce; + nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot; + nvlist_t **spares, **l2cache; + uint_t i, nspares, nl2cache; + boolean_t config_seen; + uint64_t best_txg; + char *name, *hostname; + uint64_t guid; + uint_t children = 0; + nvlist_t **child = NULL; + uint_t holes; + uint64_t *hole_array, max_id; + uint_t c; + boolean_t isactive; + uint64_t hostid; + nvlist_t *nvl; + boolean_t found_one = B_FALSE; + boolean_t valid_top_config = B_FALSE; + + if (nvlist_alloc(&ret, 0, 0) != 0) + goto nomem; + + for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { + uint64_t id, max_txg = 0; + + if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) + goto nomem; + config_seen = B_FALSE; + + /* + * Iterate over all toplevel vdevs. Grab the pool configuration + * from the first one we find, and then go through the rest and + * add them as necessary to the 'vdevs' member of the config. + */ + for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { + + /* + * Determine the best configuration for this vdev by + * selecting the config with the latest transaction + * group. + */ + best_txg = 0; + for (ce = ve->ve_configs; ce != NULL; + ce = ce->ce_next) { + + if (ce->ce_txg > best_txg) { + tmp = ce->ce_config; + best_txg = ce->ce_txg; + } + } + + /* + * We rely on the fact that the max txg for the + * pool will contain the most up-to-date information + * about the valid top-levels in the vdev namespace. + */ + if (best_txg > max_txg) { + (void) nvlist_remove(config, + ZPOOL_CONFIG_VDEV_CHILDREN, + DATA_TYPE_UINT64); + (void) nvlist_remove(config, + ZPOOL_CONFIG_HOLE_ARRAY, + DATA_TYPE_UINT64_ARRAY); + + max_txg = best_txg; + hole_array = NULL; + holes = 0; + max_id = 0; + valid_top_config = B_FALSE; + + if (nvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { + verify(nvlist_add_uint64(config, + ZPOOL_CONFIG_VDEV_CHILDREN, + max_id) == 0); + valid_top_config = B_TRUE; + } + + if (nvlist_lookup_uint64_array(tmp, + ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, + &holes) == 0) { + verify(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_HOLE_ARRAY, + hole_array, holes) == 0); + } + } + + if (!config_seen) { + /* + * Copy the relevant pieces of data to the pool + * configuration: + * + * version + * pool guid + * name + * comment (if available) + * pool state + * hostid (if available) + * hostname (if available) + */ + uint64_t state, version; + char *comment = NULL; + + version = fnvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_VERSION); + fnvlist_add_uint64(config, + ZPOOL_CONFIG_VERSION, version); + guid = fnvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_POOL_GUID); + fnvlist_add_uint64(config, + ZPOOL_CONFIG_POOL_GUID, guid); + name = fnvlist_lookup_string(tmp, + ZPOOL_CONFIG_POOL_NAME); + fnvlist_add_string(config, + ZPOOL_CONFIG_POOL_NAME, name); + + if (nvlist_lookup_string(tmp, + ZPOOL_CONFIG_COMMENT, &comment) == 0) + fnvlist_add_string(config, + ZPOOL_CONFIG_COMMENT, comment); + + state = fnvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_POOL_STATE); + fnvlist_add_uint64(config, + ZPOOL_CONFIG_POOL_STATE, state); + + hostid = 0; + if (nvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_HOSTID, &hostid) == 0) { + fnvlist_add_uint64(config, + ZPOOL_CONFIG_HOSTID, hostid); + hostname = fnvlist_lookup_string(tmp, + ZPOOL_CONFIG_HOSTNAME); + fnvlist_add_string(config, + ZPOOL_CONFIG_HOSTNAME, hostname); + } + + config_seen = B_TRUE; + } + + /* + * Add this top-level vdev to the child array. + */ + verify(nvlist_lookup_nvlist(tmp, + ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); + verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, + &id) == 0); + + if (id >= children) { + nvlist_t **newchild; + + newchild = zfs_alloc(hdl, (id + 1) * + sizeof (nvlist_t *)); + if (newchild == NULL) + goto nomem; + + for (c = 0; c < children; c++) + newchild[c] = child[c]; + + free(child); + child = newchild; + children = id + 1; + } + if (nvlist_dup(nvtop, &child[id], 0) != 0) + goto nomem; + + } + + /* + * If we have information about all the top-levels then + * clean up the nvlist which we've constructed. This + * means removing any extraneous devices that are + * beyond the valid range or adding devices to the end + * of our array which appear to be missing. + */ + if (valid_top_config) { + if (max_id < children) { + for (c = max_id; c < children; c++) + nvlist_free(child[c]); + children = max_id; + } else if (max_id > children) { + nvlist_t **newchild; + + newchild = zfs_alloc(hdl, (max_id) * + sizeof (nvlist_t *)); + if (newchild == NULL) + goto nomem; + + for (c = 0; c < children; c++) + newchild[c] = child[c]; + + free(child); + child = newchild; + children = max_id; + } + } + + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &guid) == 0); + + /* + * The vdev namespace may contain holes as a result of + * device removal. We must add them back into the vdev + * tree before we process any missing devices. + */ + if (holes > 0) { + ASSERT(valid_top_config); + + for (c = 0; c < children; c++) { + nvlist_t *holey; + + if (child[c] != NULL || + !vdev_is_hole(hole_array, holes, c)) + continue; + + if (nvlist_alloc(&holey, NV_UNIQUE_NAME, + 0) != 0) + goto nomem; + + /* + * Holes in the namespace are treated as + * "hole" top-level vdevs and have a + * special flag set on them. + */ + if (nvlist_add_string(holey, + ZPOOL_CONFIG_TYPE, + VDEV_TYPE_HOLE) != 0 || + nvlist_add_uint64(holey, + ZPOOL_CONFIG_ID, c) != 0 || + nvlist_add_uint64(holey, + ZPOOL_CONFIG_GUID, 0ULL) != 0) + goto nomem; + child[c] = holey; + } + } + + /* + * Look for any missing top-level vdevs. If this is the case, + * create a faked up 'missing' vdev as a placeholder. We cannot + * simply compress the child array, because the kernel performs + * certain checks to make sure the vdev IDs match their location + * in the configuration. + */ + for (c = 0; c < children; c++) { + if (child[c] == NULL) { + nvlist_t *missing; + if (nvlist_alloc(&missing, NV_UNIQUE_NAME, + 0) != 0) + goto nomem; + if (nvlist_add_string(missing, + ZPOOL_CONFIG_TYPE, + VDEV_TYPE_MISSING) != 0 || + nvlist_add_uint64(missing, + ZPOOL_CONFIG_ID, c) != 0 || + nvlist_add_uint64(missing, + ZPOOL_CONFIG_GUID, 0ULL) != 0) { + nvlist_free(missing); + goto nomem; + } + child[c] = missing; + } + } + + /* + * Put all of this pool's top-level vdevs into a root vdev. + */ + if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) + goto nomem; + if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_ROOT) != 0 || + nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || + nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || + nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + child, children) != 0) { + nvlist_free(nvroot); + goto nomem; + } + + for (c = 0; c < children; c++) + nvlist_free(child[c]); + free(child); + children = 0; + child = NULL; + + /* + * Go through and fix up any paths and/or devids based on our + * known list of vdev GUID -> path mappings. + */ + if (fix_paths(nvroot, pl->names) != 0) { + nvlist_free(nvroot); + goto nomem; + } + + /* + * Add the root vdev to this pool's configuration. + */ + if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + nvroot) != 0) { + nvlist_free(nvroot); + goto nomem; + } + nvlist_free(nvroot); + + /* + * zdb uses this path to report on active pools that were + * imported or created using -R. + */ + if (active_ok) + goto add_pool; + + /* + * Determine if this pool is currently active, in which case we + * can't actually import it. + */ + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &guid) == 0); + + if (pool_active(hdl, name, guid, &isactive) != 0) + goto error; + + if (isactive) { + nvlist_free(config); + config = NULL; + continue; + } + + if ((nvl = refresh_config(hdl, config)) == NULL) { + nvlist_free(config); + config = NULL; + continue; + } + + nvlist_free(config); + config = nvl; + + /* + * Go through and update the paths for spares, now that we have + * them. + */ + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + for (i = 0; i < nspares; i++) { + if (fix_paths(spares[i], pl->names) != 0) + goto nomem; + } + } + + /* + * Update the paths for l2cache devices. + */ + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + for (i = 0; i < nl2cache; i++) { + if (fix_paths(l2cache[i], pl->names) != 0) + goto nomem; + } + } + + /* + * Restore the original information read from the actual label. + */ + (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID, + DATA_TYPE_UINT64); + (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME, + DATA_TYPE_STRING); + if (hostid != 0) { + verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, + hostid) == 0); + verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, + hostname) == 0); + } + +add_pool: + /* + * Add this pool to the list of configs. + */ + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + if (nvlist_add_nvlist(ret, name, config) != 0) + goto nomem; + + found_one = B_TRUE; + nvlist_free(config); + config = NULL; + } + + if (!found_one) { + nvlist_free(ret); + ret = NULL; + } + + return (ret); + +nomem: + (void) no_memory(hdl); +error: + nvlist_free(config); + nvlist_free(ret); + for (c = 0; c < children; c++) + nvlist_free(child[c]); + free(child); + + return (NULL); +} + +/* + * Return the offset of the given label. + */ +static uint64_t +label_offset(uint64_t size, int l) +{ + ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); + return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? + 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); +} + +/* + * Given a file descriptor, read the label information and return an nvlist + * describing the configuration, if there is one. + */ +int +zpool_read_label(int fd, nvlist_t **config) +{ + struct stat64 statbuf; + int l; + vdev_label_t *label; + uint64_t state, txg, size; + + *config = NULL; + + if (fstat64(fd, &statbuf) == -1) + return (0); + size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); + + if ((label = malloc(sizeof (vdev_label_t))) == NULL) + return (-1); + + for (l = 0; l < VDEV_LABELS; l++) { + if (pread64(fd, label, sizeof (vdev_label_t), + label_offset(size, l)) != sizeof (vdev_label_t)) + continue; + + if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, + sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) + continue; + + if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, + &state) != 0 || state > POOL_STATE_L2CACHE) { + nvlist_free(*config); + continue; + } + + if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && + (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, + &txg) != 0 || txg == 0)) { + nvlist_free(*config); + continue; + } + + free(label); + return (0); + } + + free(label); + *config = NULL; + return (0); +} + +typedef struct rdsk_node { + char *rn_name; + int rn_dfd; + libzfs_handle_t *rn_hdl; + nvlist_t *rn_config; + avl_tree_t *rn_avl; + avl_node_t rn_node; + boolean_t rn_nozpool; +} rdsk_node_t; + +static int +slice_cache_compare(const void *arg1, const void *arg2) +{ + const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; + const char *nm2 = ((rdsk_node_t *)arg2)->rn_name; + char *nm1slice, *nm2slice; + int rv; + + /* + * slices zero and two are the most likely to provide results, + * so put those first + */ + nm1slice = strstr(nm1, "s0"); + nm2slice = strstr(nm2, "s0"); + if (nm1slice && !nm2slice) { + return (-1); + } + if (!nm1slice && nm2slice) { + return (1); + } + nm1slice = strstr(nm1, "s2"); + nm2slice = strstr(nm2, "s2"); + if (nm1slice && !nm2slice) { + return (-1); + } + if (!nm1slice && nm2slice) { + return (1); + } + + rv = strcmp(nm1, nm2); + if (rv == 0) + return (0); + return (rv > 0 ? 1 : -1); +} + +#ifdef sun +static void +check_one_slice(avl_tree_t *r, char *diskname, uint_t partno, + diskaddr_t size, uint_t blksz) +{ + rdsk_node_t tmpnode; + rdsk_node_t *node; + char sname[MAXNAMELEN]; + + tmpnode.rn_name = &sname[0]; + (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u", + diskname, partno); + /* + * protect against division by zero for disk labels that + * contain a bogus sector size + */ + if (blksz == 0) + blksz = DEV_BSIZE; + /* too small to contain a zpool? */ + if ((size < (SPA_MINDEVSIZE / blksz)) && + (node = avl_find(r, &tmpnode, NULL))) + node->rn_nozpool = B_TRUE; +} +#endif /* sun */ + +static void +nozpool_all_slices(avl_tree_t *r, const char *sname) +{ +#ifdef sun + char diskname[MAXNAMELEN]; + char *ptr; + int i; + + (void) strncpy(diskname, sname, MAXNAMELEN); + if (((ptr = strrchr(diskname, 's')) == NULL) && + ((ptr = strrchr(diskname, 'p')) == NULL)) + return; + ptr[0] = 's'; + ptr[1] = '\0'; + for (i = 0; i < NDKMAP; i++) + check_one_slice(r, diskname, i, 0, 1); + ptr[0] = 'p'; + for (i = 0; i <= FD_NUMPART; i++) + check_one_slice(r, diskname, i, 0, 1); +#endif /* sun */ +} + +#ifdef sun +static void +check_slices(avl_tree_t *r, int fd, const char *sname) +{ + struct extvtoc vtoc; + struct dk_gpt *gpt; + char diskname[MAXNAMELEN]; + char *ptr; + int i; + + (void) strncpy(diskname, sname, MAXNAMELEN); + if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1])) + return; + ptr[1] = '\0'; + + if (read_extvtoc(fd, &vtoc) >= 0) { + for (i = 0; i < NDKMAP; i++) + check_one_slice(r, diskname, i, + vtoc.v_part[i].p_size, vtoc.v_sectorsz); + } else if (efi_alloc_and_read(fd, &gpt) >= 0) { + /* + * on x86 we'll still have leftover links that point + * to slices s[9-15], so use NDKMAP instead + */ + for (i = 0; i < NDKMAP; i++) + check_one_slice(r, diskname, i, + gpt->efi_parts[i].p_size, gpt->efi_lbasize); + /* nodes p[1-4] are never used with EFI labels */ + ptr[0] = 'p'; + for (i = 1; i <= FD_NUMPART; i++) + check_one_slice(r, diskname, i, 0, 1); + efi_free(gpt); + } +} +#endif /* sun */ + +static void +zpool_open_func(void *arg) +{ + rdsk_node_t *rn = arg; + struct stat64 statbuf; + nvlist_t *config; + int fd; + + if (rn->rn_nozpool) + return; + if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) { + /* symlink to a device that's no longer there */ + if (errno == ENOENT) + nozpool_all_slices(rn->rn_avl, rn->rn_name); + return; + } + /* + * Ignore failed stats. We only want regular + * files, character devs and block devs. + */ + if (fstat64(fd, &statbuf) != 0 || + (!S_ISREG(statbuf.st_mode) && + !S_ISCHR(statbuf.st_mode) && + !S_ISBLK(statbuf.st_mode))) { + (void) close(fd); + return; + } + /* this file is too small to hold a zpool */ +#ifdef sun + if (S_ISREG(statbuf.st_mode) && + statbuf.st_size < SPA_MINDEVSIZE) { + (void) close(fd); + return; + } else if (!S_ISREG(statbuf.st_mode)) { + /* + * Try to read the disk label first so we don't have to + * open a bunch of minor nodes that can't have a zpool. + */ + check_slices(rn->rn_avl, fd, rn->rn_name); + } +#else /* !sun */ + if (statbuf.st_size < SPA_MINDEVSIZE) { + (void) close(fd); + return; + } +#endif /* sun */ + + if ((zpool_read_label(fd, &config)) != 0) { + (void) close(fd); + (void) no_memory(rn->rn_hdl); + return; + } + (void) close(fd); + + + rn->rn_config = config; + if (config != NULL) { + assert(rn->rn_nozpool == B_FALSE); + } +} + +/* + * Given a file descriptor, clear (zero) the label information. This function + * is used in the appliance stack as part of the ZFS sysevent module and + * to implement the "zpool labelclear" command. + */ +int +zpool_clear_label(int fd) +{ + struct stat64 statbuf; + int l; + vdev_label_t *label; + uint64_t size; + + if (fstat64(fd, &statbuf) == -1) + return (0); + size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); + + if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL) + return (-1); + + for (l = 0; l < VDEV_LABELS; l++) { + if (pwrite64(fd, label, sizeof (vdev_label_t), + label_offset(size, l)) != sizeof (vdev_label_t)) + return (-1); + } + + free(label); + return (0); +} + +/* + * Given a list of directories to search, find all pools stored on disk. This + * includes partial pools which are not available to import. If no args are + * given (argc is 0), then the default directory (/dev/dsk) is searched. + * poolname or guid (but not both) are provided by the caller when trying + * to import a specific pool. + */ +static nvlist_t * +zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) +{ + int i, dirs = iarg->paths; + DIR *dirp = NULL; + struct dirent64 *dp; + char path[MAXPATHLEN]; + char *end, **dir = iarg->path; + size_t pathleft; + nvlist_t *ret = NULL; + static char *default_dir = "/dev"; + pool_list_t pools = { 0 }; + pool_entry_t *pe, *penext; + vdev_entry_t *ve, *venext; + config_entry_t *ce, *cenext; + name_entry_t *ne, *nenext; + avl_tree_t slice_cache; + rdsk_node_t *slice; + void *cookie; + + if (dirs == 0) { + dirs = 1; + dir = &default_dir; + } + + /* + * Go through and read the label configuration information from every + * possible device, organizing the information according to pool GUID + * and toplevel GUID. + */ + for (i = 0; i < dirs; i++) { + tpool_t *t; + char *rdsk; + int dfd; + + /* use realpath to normalize the path */ + if (realpath(dir[i], path) == 0) { + (void) zfs_error_fmt(hdl, EZFS_BADPATH, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]); + goto error; + } + end = &path[strlen(path)]; + *end++ = '/'; + *end = 0; + pathleft = &path[sizeof (path)] - end; + + /* + * Using raw devices instead of block devices when we're + * reading the labels skips a bunch of slow operations during + * close(2) processing, so we replace /dev/dsk with /dev/rdsk. + */ + if (strcmp(path, "/dev/dsk/") == 0) + rdsk = "/dev/"; + else + rdsk = path; + + if ((dfd = open64(rdsk, O_RDONLY)) < 0 || + (dirp = fdopendir(dfd)) == NULL) { + zfs_error_aux(hdl, strerror(errno)); + (void) zfs_error_fmt(hdl, EZFS_BADPATH, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), + rdsk); + goto error; + } + + avl_create(&slice_cache, slice_cache_compare, + sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); + + if (strcmp(rdsk, "/dev/") == 0) { + struct gmesh mesh; + struct gclass *mp; + struct ggeom *gp; + struct gprovider *pp; + + errno = geom_gettree(&mesh); + if (errno != 0) { + zfs_error_aux(hdl, strerror(errno)); + (void) zfs_error_fmt(hdl, EZFS_BADPATH, + dgettext(TEXT_DOMAIN, "cannot get GEOM tree")); + goto error; + } + + LIST_FOREACH(mp, &mesh.lg_class, lg_class) { + LIST_FOREACH(gp, &mp->lg_geom, lg_geom) { + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); + slice->rn_name = zfs_strdup(hdl, pp->lg_name); + slice->rn_avl = &slice_cache; + slice->rn_dfd = dfd; + slice->rn_hdl = hdl; + slice->rn_nozpool = B_FALSE; + avl_add(&slice_cache, slice); + } + } + } + + geom_deletetree(&mesh); + goto skipdir; + } + + /* + * This is not MT-safe, but we have no MT consumers of libzfs + */ + while ((dp = readdir64(dirp)) != NULL) { + const char *name = dp->d_name; + if (name[0] == '.' && + (name[1] == 0 || (name[1] == '.' && name[2] == 0))) + continue; + + slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); + slice->rn_name = zfs_strdup(hdl, name); + slice->rn_avl = &slice_cache; + slice->rn_dfd = dfd; + slice->rn_hdl = hdl; + slice->rn_nozpool = B_FALSE; + avl_add(&slice_cache, slice); + } +skipdir: + /* + * create a thread pool to do all of this in parallel; + * rn_nozpool is not protected, so this is racy in that + * multiple tasks could decide that the same slice can + * not hold a zpool, which is benign. Also choose + * double the number of processors; we hold a lot of + * locks in the kernel, so going beyond this doesn't + * buy us much. + */ + t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), + 0, NULL); + for (slice = avl_first(&slice_cache); slice; + (slice = avl_walk(&slice_cache, slice, + AVL_AFTER))) + (void) tpool_dispatch(t, zpool_open_func, slice); + tpool_wait(t); + tpool_destroy(t); + + cookie = NULL; + while ((slice = avl_destroy_nodes(&slice_cache, + &cookie)) != NULL) { + if (slice->rn_config != NULL) { + nvlist_t *config = slice->rn_config; + boolean_t matched = B_TRUE; + + if (iarg->poolname != NULL) { + char *pname; + + matched = nvlist_lookup_string(config, + ZPOOL_CONFIG_POOL_NAME, + &pname) == 0 && + strcmp(iarg->poolname, pname) == 0; + } else if (iarg->guid != 0) { + uint64_t this_guid; + + matched = nvlist_lookup_uint64(config, + ZPOOL_CONFIG_POOL_GUID, + &this_guid) == 0 && + iarg->guid == this_guid; + } + if (!matched) { + nvlist_free(config); + config = NULL; + continue; + } + /* use the non-raw path for the config */ + (void) strlcpy(end, slice->rn_name, pathleft); + if (add_config(hdl, &pools, path, config) != 0) + goto error; + } + free(slice->rn_name); + free(slice); + } + avl_destroy(&slice_cache); + + (void) closedir(dirp); + dirp = NULL; + } + + ret = get_configs(hdl, &pools, iarg->can_be_active); + +error: + for (pe = pools.pools; pe != NULL; pe = penext) { + penext = pe->pe_next; + for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { + venext = ve->ve_next; + for (ce = ve->ve_configs; ce != NULL; ce = cenext) { + cenext = ce->ce_next; + if (ce->ce_config) + nvlist_free(ce->ce_config); + free(ce); + } + free(ve); + } + free(pe); + } + + for (ne = pools.names; ne != NULL; ne = nenext) { + nenext = ne->ne_next; + if (ne->ne_name) + free(ne->ne_name); + free(ne); + } + + if (dirp) + (void) closedir(dirp); + + return (ret); +} + +nvlist_t * +zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv) +{ + importargs_t iarg = { 0 }; + + iarg.paths = argc; + iarg.path = argv; + + return (zpool_find_import_impl(hdl, &iarg)); +} + +/* + * Given a cache file, return the contents as a list of importable pools. + * poolname or guid (but not both) are provided by the caller when trying + * to import a specific pool. + */ +nvlist_t * +zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile, + char *poolname, uint64_t guid) +{ + char *buf; + int fd; + struct stat64 statbuf; + nvlist_t *raw, *src, *dst; + nvlist_t *pools; + nvpair_t *elem; + char *name; + uint64_t this_guid; + boolean_t active; + + verify(poolname == NULL || guid == 0); + + if ((fd = open(cachefile, O_RDONLY)) < 0) { + zfs_error_aux(hdl, "%s", strerror(errno)); + (void) zfs_error(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "failed to open cache file")); + return (NULL); + } + + if (fstat64(fd, &statbuf) != 0) { + zfs_error_aux(hdl, "%s", strerror(errno)); + (void) close(fd); + (void) zfs_error(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "failed to get size of cache file")); + return (NULL); + } + + if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) { + (void) close(fd); + return (NULL); + } + + if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { + (void) close(fd); + free(buf); + (void) zfs_error(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, + "failed to read cache file contents")); + return (NULL); + } + + (void) close(fd); + + if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) { + free(buf); + (void) zfs_error(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, + "invalid or corrupt cache file contents")); + return (NULL); + } + + free(buf); + + /* + * Go through and get the current state of the pools and refresh their + * state. + */ + if (nvlist_alloc(&pools, 0, 0) != 0) { + (void) no_memory(hdl); + nvlist_free(raw); + return (NULL); + } + + elem = NULL; + while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) { + src = fnvpair_value_nvlist(elem); + + name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME); + if (poolname != NULL && strcmp(poolname, name) != 0) + continue; + + this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID); + if (guid != 0 && guid != this_guid) + continue; + + if (pool_active(hdl, name, this_guid, &active) != 0) { + nvlist_free(raw); + nvlist_free(pools); + return (NULL); + } + + if (active) + continue; + + if ((dst = refresh_config(hdl, src)) == NULL) { + nvlist_free(raw); + nvlist_free(pools); + return (NULL); + } + + if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) { + (void) no_memory(hdl); + nvlist_free(dst); + nvlist_free(raw); + nvlist_free(pools); + return (NULL); + } + nvlist_free(dst); + } + + nvlist_free(raw); + return (pools); +} + +static int +name_or_guid_exists(zpool_handle_t *zhp, void *data) +{ + importargs_t *import = data; + int found = 0; + + if (import->poolname != NULL) { + char *pool_name; + + verify(nvlist_lookup_string(zhp->zpool_config, + ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0); + if (strcmp(pool_name, import->poolname) == 0) + found = 1; + } else { + uint64_t pool_guid; + + verify(nvlist_lookup_uint64(zhp->zpool_config, + ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0); + if (pool_guid == import->guid) + found = 1; + } + + zpool_close(zhp); + return (found); +} + +nvlist_t * +zpool_search_import(libzfs_handle_t *hdl, importargs_t *import) +{ + verify(import->poolname == NULL || import->guid == 0); + + if (import->unique) + import->exists = zpool_iter(hdl, name_or_guid_exists, import); + + if (import->cachefile != NULL) + return (zpool_find_import_cached(hdl, import->cachefile, + import->poolname, import->guid)); + + return (zpool_find_import_impl(hdl, import)); +} + +boolean_t +find_guid(nvlist_t *nv, uint64_t guid) +{ + uint64_t tmp; + nvlist_t **child; + uint_t c, children; + + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0); + if (tmp == guid) + return (B_TRUE); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if (find_guid(child[c], guid)) + return (B_TRUE); + } + + return (B_FALSE); +} + +typedef struct aux_cbdata { + const char *cb_type; + uint64_t cb_guid; + zpool_handle_t *cb_zhp; +} aux_cbdata_t; + +static int +find_aux(zpool_handle_t *zhp, void *data) +{ + aux_cbdata_t *cbp = data; + nvlist_t **list; + uint_t i, count; + uint64_t guid; + nvlist_t *nvroot; + + verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type, + &list, &count) == 0) { + for (i = 0; i < count; i++) { + verify(nvlist_lookup_uint64(list[i], + ZPOOL_CONFIG_GUID, &guid) == 0); + if (guid == cbp->cb_guid) { + cbp->cb_zhp = zhp; + return (1); + } + } + } + + zpool_close(zhp); + return (0); +} + +/* + * Determines if the pool is in use. If so, it returns true and the state of + * the pool as well as the name of the pool. Both strings are allocated and + * must be freed by the caller. + */ +int +zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, + boolean_t *inuse) +{ + nvlist_t *config; + char *name; + boolean_t ret; + uint64_t guid, vdev_guid; + zpool_handle_t *zhp; + nvlist_t *pool_config; + uint64_t stateval, isspare; + aux_cbdata_t cb = { 0 }; + boolean_t isactive; + + *inuse = B_FALSE; + + if (zpool_read_label(fd, &config) != 0) { + (void) no_memory(hdl); + return (-1); + } + + if (config == NULL) + return (0); + + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, + &stateval) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, + &vdev_guid) == 0); + + if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) { + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &guid) == 0); + } + + switch (stateval) { + case POOL_STATE_EXPORTED: + /* + * A pool with an exported state may in fact be imported + * read-only, so check the in-core state to see if it's + * active and imported read-only. If it is, set + * its state to active. + */ + if (pool_active(hdl, name, guid, &isactive) == 0 && isactive && + (zhp = zpool_open_canfail(hdl, name)) != NULL) { + if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL)) + stateval = POOL_STATE_ACTIVE; + + /* + * All we needed the zpool handle for is the + * readonly prop check. + */ + zpool_close(zhp); + } + + ret = B_TRUE; + break; + + case POOL_STATE_ACTIVE: + /* + * For an active pool, we have to determine if it's really part + * of a currently active pool (in which case the pool will exist + * and the guid will be the same), or whether it's part of an + * active pool that was disconnected without being explicitly + * exported. + */ + if (pool_active(hdl, name, guid, &isactive) != 0) { + nvlist_free(config); + return (-1); + } + + if (isactive) { + /* + * Because the device may have been removed while + * offlined, we only report it as active if the vdev is + * still present in the config. Otherwise, pretend like + * it's not in use. + */ + if ((zhp = zpool_open_canfail(hdl, name)) != NULL && + (pool_config = zpool_get_config(zhp, NULL)) + != NULL) { + nvlist_t *nvroot; + + verify(nvlist_lookup_nvlist(pool_config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + ret = find_guid(nvroot, vdev_guid); + } else { + ret = B_FALSE; + } + + /* + * If this is an active spare within another pool, we + * treat it like an unused hot spare. This allows the + * user to create a pool with a hot spare that currently + * in use within another pool. Since we return B_TRUE, + * libdiskmgt will continue to prevent generic consumers + * from using the device. + */ + if (ret && nvlist_lookup_uint64(config, + ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) + stateval = POOL_STATE_SPARE; + + if (zhp != NULL) + zpool_close(zhp); + } else { + stateval = POOL_STATE_POTENTIALLY_ACTIVE; + ret = B_TRUE; + } + break; + + case POOL_STATE_SPARE: + /* + * For a hot spare, it can be either definitively in use, or + * potentially active. To determine if it's in use, we iterate + * over all pools in the system and search for one with a spare + * with a matching guid. + * + * Due to the shared nature of spares, we don't actually report + * the potentially active case as in use. This means the user + * can freely create pools on the hot spares of exported pools, + * but to do otherwise makes the resulting code complicated, and + * we end up having to deal with this case anyway. + */ + cb.cb_zhp = NULL; + cb.cb_guid = vdev_guid; + cb.cb_type = ZPOOL_CONFIG_SPARES; + if (zpool_iter(hdl, find_aux, &cb) == 1) { + name = (char *)zpool_get_name(cb.cb_zhp); + ret = TRUE; + } else { + ret = FALSE; + } + break; + + case POOL_STATE_L2CACHE: + + /* + * Check if any pool is currently using this l2cache device. + */ + cb.cb_zhp = NULL; + cb.cb_guid = vdev_guid; + cb.cb_type = ZPOOL_CONFIG_L2CACHE; + if (zpool_iter(hdl, find_aux, &cb) == 1) { + name = (char *)zpool_get_name(cb.cb_zhp); + ret = TRUE; + } else { + ret = FALSE; + } + break; + + default: + ret = B_FALSE; + } + + + if (ret) { + if ((*namestr = zfs_strdup(hdl, name)) == NULL) { + if (cb.cb_zhp) + zpool_close(cb.cb_zhp); + nvlist_free(config); + return (-1); + } + *state = (pool_state_t)stateval; + } + + if (cb.cb_zhp) + zpool_close(cb.cb_zhp); + + nvlist_free(config); + *inuse = ret; + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c new file mode 100644 index 0000000..9698a72 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c @@ -0,0 +1,529 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>. + * All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <stddef.h> +#include <libintl.h> +#include <libzfs.h> + +#include "libzfs_impl.h" + +int +zfs_iter_clones(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + nvlist_t *nvl = zfs_get_clones_nvl(zhp); + nvpair_t *pair; + + if (nvl == NULL) + return (0); + + for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) { + zfs_handle_t *clone = zfs_open(zhp->zfs_hdl, nvpair_name(pair), + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (clone != NULL) { + int err = func(clone, data); + if (err != 0) + return (err); + } + } + return (0); +} + +static int +zfs_do_list_ioctl(zfs_handle_t *zhp, unsigned long arg, zfs_cmd_t *zc) +{ + int rc; + uint64_t orig_cookie; + + orig_cookie = zc->zc_cookie; +top: + (void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name)); + rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc); + + if (rc == -1) { + switch (errno) { + case ENOMEM: + /* expand nvlist memory and try again */ + if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, zc) != 0) { + zcmd_free_nvlists(zc); + return (-1); + } + zc->zc_cookie = orig_cookie; + goto top; + /* + * An errno value of ESRCH indicates normal completion. + * If ENOENT is returned, then the underlying dataset + * has been removed since we obtained the handle. + */ + case ESRCH: + case ENOENT: + rc = 1; + break; + default: + rc = zfs_standard_error(zhp->zfs_hdl, errno, + dgettext(TEXT_DOMAIN, + "cannot iterate filesystems")); + break; + } + } + return (rc); +} + +/* + * Iterate over all child filesystems + */ +int +zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + zfs_cmd_t zc = { 0 }; + zfs_handle_t *nzhp; + int ret; + + if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM) + return (0); + + if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) + return (-1); + + while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT, + &zc)) == 0) { + /* + * Silently ignore errors, as the only plausible explanation is + * that the pool has since been removed. + */ + if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl, + &zc)) == NULL) { + continue; + } + + if ((ret = func(nzhp, data)) != 0) { + zcmd_free_nvlists(&zc); + return (ret); + } + } + zcmd_free_nvlists(&zc); + return ((ret < 0) ? ret : 0); +} + +/* + * Iterate over all snapshots + */ +int +zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, + void *data) +{ + zfs_cmd_t zc = { 0 }; + zfs_handle_t *nzhp; + int ret; + + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT || + zhp->zfs_type == ZFS_TYPE_BOOKMARK) + return (0); + + zc.zc_simple = simple; + + if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) + return (-1); + while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT, + &zc)) == 0) { + + if (simple) + nzhp = make_dataset_simple_handle_zc(zhp, &zc); + else + nzhp = make_dataset_handle_zc(zhp->zfs_hdl, &zc); + if (nzhp == NULL) + continue; + + if ((ret = func(nzhp, data)) != 0) { + zcmd_free_nvlists(&zc); + return (ret); + } + } + zcmd_free_nvlists(&zc); + return ((ret < 0) ? ret : 0); +} + +/* + * Iterate over all bookmarks + */ +int +zfs_iter_bookmarks(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + zfs_handle_t *nzhp; + nvlist_t *props = NULL; + nvlist_t *bmarks = NULL; + int err; + + if ((zfs_get_type(zhp) & (ZFS_TYPE_SNAPSHOT | ZFS_TYPE_BOOKMARK)) != 0) + return (0); + + /* Setup the requested properties nvlist. */ + props = fnvlist_alloc(); + fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_GUID)); + fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATETXG)); + fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATION)); + + /* Allocate an nvlist to hold the bookmarks. */ + bmarks = fnvlist_alloc(); + + if ((err = lzc_get_bookmarks(zhp->zfs_name, props, &bmarks)) != 0) + goto out; + + for (nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL); + pair != NULL; pair = nvlist_next_nvpair(bmarks, pair)) { + char name[ZFS_MAXNAMELEN]; + char *bmark_name; + nvlist_t *bmark_props; + + bmark_name = nvpair_name(pair); + bmark_props = fnvpair_value_nvlist(pair); + + (void) snprintf(name, sizeof (name), "%s#%s", zhp->zfs_name, + bmark_name); + + nzhp = make_bookmark_handle(zhp, name, bmark_props); + if (nzhp == NULL) + continue; + + if ((err = func(nzhp, data)) != 0) + goto out; + } + +out: + fnvlist_free(props); + fnvlist_free(bmarks); + + return (err); +} + +/* + * Routines for dealing with the sorted snapshot functionality + */ +typedef struct zfs_node { + zfs_handle_t *zn_handle; + avl_node_t zn_avlnode; +} zfs_node_t; + +static int +zfs_sort_snaps(zfs_handle_t *zhp, void *data) +{ + avl_tree_t *avl = data; + zfs_node_t *node; + zfs_node_t search; + + search.zn_handle = zhp; + node = avl_find(avl, &search, NULL); + if (node) { + /* + * If this snapshot was renamed while we were creating the + * AVL tree, it's possible that we already inserted it under + * its old name. Remove the old handle before adding the new + * one. + */ + zfs_close(node->zn_handle); + avl_remove(avl, node); + free(node); + } + + node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t)); + node->zn_handle = zhp; + avl_add(avl, node); + + return (0); +} + +static int +zfs_snapshot_compare(const void *larg, const void *rarg) +{ + zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle; + zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle; + uint64_t lcreate, rcreate; + + /* + * Sort them according to creation time. We use the hidden + * CREATETXG property to get an absolute ordering of snapshots. + */ + lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); + rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); + + if (lcreate < rcreate) + return (-1); + else if (lcreate > rcreate) + return (+1); + else + return (0); +} + +int +zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data) +{ + int ret = 0; + zfs_node_t *node; + avl_tree_t avl; + void *cookie = NULL; + + avl_create(&avl, zfs_snapshot_compare, + sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode)); + + ret = zfs_iter_snapshots(zhp, B_FALSE, zfs_sort_snaps, &avl); + + for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node)) + ret |= callback(node->zn_handle, data); + + while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL) + free(node); + + avl_destroy(&avl); + + return (ret); +} + +typedef struct { + char *ssa_first; + char *ssa_last; + boolean_t ssa_seenfirst; + boolean_t ssa_seenlast; + zfs_iter_f ssa_func; + void *ssa_arg; +} snapspec_arg_t; + +static int +snapspec_cb(zfs_handle_t *zhp, void *arg) { + snapspec_arg_t *ssa = arg; + char *shortsnapname; + int err = 0; + + if (ssa->ssa_seenlast) + return (0); + shortsnapname = zfs_strdup(zhp->zfs_hdl, + strchr(zfs_get_name(zhp), '@') + 1); + + if (!ssa->ssa_seenfirst && strcmp(shortsnapname, ssa->ssa_first) == 0) + ssa->ssa_seenfirst = B_TRUE; + + if (ssa->ssa_seenfirst) { + err = ssa->ssa_func(zhp, ssa->ssa_arg); + } else { + zfs_close(zhp); + } + + if (strcmp(shortsnapname, ssa->ssa_last) == 0) + ssa->ssa_seenlast = B_TRUE; + free(shortsnapname); + + return (err); +} + +/* + * spec is a string like "A,B%C,D" + * + * <snaps>, where <snaps> can be: + * <snap> (single snapshot) + * <snap>%<snap> (range of snapshots, inclusive) + * %<snap> (range of snapshots, starting with earliest) + * <snap>% (range of snapshots, ending with last) + * % (all snapshots) + * <snaps>[,...] (comma separated list of the above) + * + * If a snapshot can not be opened, continue trying to open the others, but + * return ENOENT at the end. + */ +int +zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, + zfs_iter_f func, void *arg) +{ + char *buf, *comma_separated, *cp; + int err = 0; + int ret = 0; + + buf = zfs_strdup(fs_zhp->zfs_hdl, spec_orig); + cp = buf; + + while ((comma_separated = strsep(&cp, ",")) != NULL) { + char *pct = strchr(comma_separated, '%'); + if (pct != NULL) { + snapspec_arg_t ssa = { 0 }; + ssa.ssa_func = func; + ssa.ssa_arg = arg; + + if (pct == comma_separated) + ssa.ssa_seenfirst = B_TRUE; + else + ssa.ssa_first = comma_separated; + *pct = '\0'; + ssa.ssa_last = pct + 1; + + /* + * If there is a lastname specified, make sure it + * exists. + */ + if (ssa.ssa_last[0] != '\0') { + char snapname[ZFS_MAXNAMELEN]; + (void) snprintf(snapname, sizeof (snapname), + "%s@%s", zfs_get_name(fs_zhp), + ssa.ssa_last); + if (!zfs_dataset_exists(fs_zhp->zfs_hdl, + snapname, ZFS_TYPE_SNAPSHOT)) { + ret = ENOENT; + continue; + } + } + + err = zfs_iter_snapshots_sorted(fs_zhp, + snapspec_cb, &ssa); + if (ret == 0) + ret = err; + if (ret == 0 && (!ssa.ssa_seenfirst || + (ssa.ssa_last[0] != '\0' && !ssa.ssa_seenlast))) { + ret = ENOENT; + } + } else { + char snapname[ZFS_MAXNAMELEN]; + zfs_handle_t *snap_zhp; + (void) snprintf(snapname, sizeof (snapname), "%s@%s", + zfs_get_name(fs_zhp), comma_separated); + snap_zhp = make_dataset_handle(fs_zhp->zfs_hdl, + snapname); + if (snap_zhp == NULL) { + ret = ENOENT; + continue; + } + err = func(snap_zhp, arg); + if (ret == 0) + ret = err; + } + } + + free(buf); + return (ret); +} + +/* + * Iterate over all children, snapshots and filesystems + */ +int +zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data) +{ + int ret; + + if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0) + return (ret); + + return (zfs_iter_snapshots(zhp, B_FALSE, func, data)); +} + + +typedef struct iter_stack_frame { + struct iter_stack_frame *next; + zfs_handle_t *zhp; +} iter_stack_frame_t; + +typedef struct iter_dependents_arg { + boolean_t first; + boolean_t allowrecursion; + iter_stack_frame_t *stack; + zfs_iter_f func; + void *data; +} iter_dependents_arg_t; + +static int +iter_dependents_cb(zfs_handle_t *zhp, void *arg) +{ + iter_dependents_arg_t *ida = arg; + int err = 0; + boolean_t first = ida->first; + ida->first = B_FALSE; + + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) { + err = zfs_iter_clones(zhp, iter_dependents_cb, ida); + } else if (zhp->zfs_type != ZFS_TYPE_BOOKMARK) { + iter_stack_frame_t isf; + iter_stack_frame_t *f; + + /* + * check if there is a cycle by seeing if this fs is already + * on the stack. + */ + for (f = ida->stack; f != NULL; f = f->next) { + if (f->zhp->zfs_dmustats.dds_guid == + zhp->zfs_dmustats.dds_guid) { + if (ida->allowrecursion) { + zfs_close(zhp); + return (0); + } else { + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, + "recursive dependency at '%s'"), + zfs_get_name(zhp)); + err = zfs_error(zhp->zfs_hdl, + EZFS_RECURSIVE, + dgettext(TEXT_DOMAIN, + "cannot determine dependent " + "datasets")); + zfs_close(zhp); + return (err); + } + } + } + + isf.zhp = zhp; + isf.next = ida->stack; + ida->stack = &isf; + err = zfs_iter_filesystems(zhp, iter_dependents_cb, ida); + if (err == 0) { + err = zfs_iter_snapshots(zhp, B_FALSE, + iter_dependents_cb, ida); + } + ida->stack = isf.next; + } + + if (!first && err == 0) + err = ida->func(zhp, ida->data); + else + zfs_close(zhp); + + return (err); +} + +int +zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion, + zfs_iter_f func, void *data) +{ + iter_dependents_arg_t ida; + ida.allowrecursion = allowrecursion; + ida.stack = NULL; + ida.func = func; + ida.data = data; + ida.first = B_TRUE; + return (iter_dependents_cb(zfs_handle_dup(zhp), &ida)); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c new file mode 100644 index 0000000..f8596ed --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c @@ -0,0 +1,1325 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014 by Delphix. All rights reserved. + */ + +/* + * Routines to manage ZFS mounts. We separate all the nasty routines that have + * to deal with the OS. The following functions are the main entry points -- + * they are used by mount and unmount and when changing a filesystem's + * mountpoint. + * + * zfs_is_mounted() + * zfs_mount() + * zfs_unmount() + * zfs_unmountall() + * + * This file also contains the functions used to manage sharing filesystems via + * NFS and iSCSI: + * + * zfs_is_shared() + * zfs_share() + * zfs_unshare() + * + * zfs_is_shared_nfs() + * zfs_is_shared_smb() + * zfs_share_proto() + * zfs_shareall(); + * zfs_unshare_nfs() + * zfs_unshare_smb() + * zfs_unshareall_nfs() + * zfs_unshareall_smb() + * zfs_unshareall() + * zfs_unshareall_bypath() + * + * The following functions are available for pool consumers, and will + * mount/unmount and share/unshare all datasets within pool: + * + * zpool_enable_datasets() + * zpool_disable_datasets() + */ + +#include <dirent.h> +#include <dlfcn.h> +#include <errno.h> +#include <libgen.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <zone.h> +#include <sys/mntent.h> +#include <sys/mount.h> +#include <sys/stat.h> + +#include <libzfs.h> + +#include "libzfs_impl.h" + +#include <libshare.h> +#define MAXISALEN 257 /* based on sysinfo(2) man page */ + +static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); +zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, + zfs_share_proto_t); + +/* + * The share protocols table must be in the same order as the zfs_share_prot_t + * enum in libzfs_impl.h + */ +typedef struct { + zfs_prop_t p_prop; + char *p_name; + int p_share_err; + int p_unshare_err; +} proto_table_t; + +proto_table_t proto_table[PROTO_END] = { + {ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED}, + {ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED}, +}; + +zfs_share_proto_t nfs_only[] = { + PROTO_NFS, + PROTO_END +}; + +zfs_share_proto_t smb_only[] = { + PROTO_SMB, + PROTO_END +}; +zfs_share_proto_t share_all_proto[] = { + PROTO_NFS, + PROTO_SMB, + PROTO_END +}; + +/* + * Search the sharetab for the given mountpoint and protocol, returning + * a zfs_share_type_t value. + */ +static zfs_share_type_t +is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto) +{ + char buf[MAXPATHLEN], *tab; + char *ptr; + + if (hdl->libzfs_sharetab == NULL) + return (SHARED_NOT_SHARED); + + (void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET); + + while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) { + + /* the mountpoint is the first entry on each line */ + if ((tab = strchr(buf, '\t')) == NULL) + continue; + + *tab = '\0'; + if (strcmp(buf, mountpoint) == 0) { +#ifdef sun + /* + * the protocol field is the third field + * skip over second field + */ + ptr = ++tab; + if ((tab = strchr(ptr, '\t')) == NULL) + continue; + ptr = ++tab; + if ((tab = strchr(ptr, '\t')) == NULL) + continue; + *tab = '\0'; + if (strcmp(ptr, + proto_table[proto].p_name) == 0) { + switch (proto) { + case PROTO_NFS: + return (SHARED_NFS); + case PROTO_SMB: + return (SHARED_SMB); + default: + return (0); + } + } +#else + if (proto == PROTO_NFS) + return (SHARED_NFS); +#endif + } + } + + return (SHARED_NOT_SHARED); +} + +#ifdef sun +/* + * Returns true if the specified directory is empty. If we can't open the + * directory at all, return true so that the mount can fail with a more + * informative error message. + */ +static boolean_t +dir_is_empty(const char *dirname) +{ + DIR *dirp; + struct dirent64 *dp; + + if ((dirp = opendir(dirname)) == NULL) + return (B_TRUE); + + while ((dp = readdir64(dirp)) != NULL) { + + if (strcmp(dp->d_name, ".") == 0 || + strcmp(dp->d_name, "..") == 0) + continue; + + (void) closedir(dirp); + return (B_FALSE); + } + + (void) closedir(dirp); + return (B_TRUE); +} +#endif + +/* + * Checks to see if the mount is active. If the filesystem is mounted, we fill + * in 'where' with the current mountpoint, and return 1. Otherwise, we return + * 0. + */ +boolean_t +is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where) +{ + struct mnttab entry; + + if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0) + return (B_FALSE); + + if (where != NULL) + *where = zfs_strdup(zfs_hdl, entry.mnt_mountp); + + return (B_TRUE); +} + +boolean_t +zfs_is_mounted(zfs_handle_t *zhp, char **where) +{ + return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where)); +} + +/* + * Returns true if the given dataset is mountable, false otherwise. Returns the + * mountpoint in 'buf'. + */ +static boolean_t +zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen, + zprop_source_t *source) +{ + char sourceloc[ZFS_MAXNAMELEN]; + zprop_source_t sourcetype; + + if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type)) + return (B_FALSE); + + verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen, + &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0); + + if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 || + strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0) + return (B_FALSE); + + if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF) + return (B_FALSE); + + if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && + getzoneid() == GLOBAL_ZONEID) + return (B_FALSE); + + if (source) + *source = sourcetype; + + return (B_TRUE); +} + +/* + * Mount the given filesystem. + */ +int +zfs_mount(zfs_handle_t *zhp, const char *options, int flags) +{ + struct stat buf; + char mountpoint[ZFS_MAXPROPLEN]; + char mntopts[MNT_LINE_MAX]; + libzfs_handle_t *hdl = zhp->zfs_hdl; + + if (options == NULL) + mntopts[0] = '\0'; + else + (void) strlcpy(mntopts, options, sizeof (mntopts)); + + /* + * If the pool is imported read-only then all mounts must be read-only + */ + if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL)) + flags |= MS_RDONLY; + + if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) + return (0); + + /* Create the directory if it doesn't already exist */ + if (lstat(mountpoint, &buf) != 0) { + if (mkdirp(mountpoint, 0755) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to create mountpoint")); + return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), + mountpoint)); + } + } + +#ifdef sun /* FreeBSD: overlay mounts are not checked. */ + /* + * Determine if the mountpoint is empty. If so, refuse to perform the + * mount. We don't perform this check if MS_OVERLAY is specified, which + * would defeat the point. We also avoid this check if 'remount' is + * specified. + */ + if ((flags & MS_OVERLAY) == 0 && + strstr(mntopts, MNTOPT_REMOUNT) == NULL && + !dir_is_empty(mountpoint)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "directory is not empty")); + return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint)); + } +#endif + + /* perform the mount */ + if (zmount(zfs_get_name(zhp), mountpoint, flags, + MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) { + /* + * Generic errors are nasty, but there are just way too many + * from mount(), and they're well-understood. We pick a few + * common ones to improve upon. + */ + if (errno == EBUSY) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "mountpoint or dataset is busy")); + } else if (errno == EPERM) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Insufficient privileges")); + } else if (errno == ENOTSUP) { + char buf[256]; + int spa_version; + + VERIFY(zfs_spa_version(zhp, &spa_version) == 0); + (void) snprintf(buf, sizeof (buf), + dgettext(TEXT_DOMAIN, "Can't mount a version %lld " + "file system on a version %d pool. Pool must be" + " upgraded to mount this file system."), + (u_longlong_t)zfs_prop_get_int(zhp, + ZFS_PROP_VERSION), spa_version); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf)); + } else { + zfs_error_aux(hdl, strerror(errno)); + } + return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), + zhp->zfs_name)); + } + + /* add the mounted entry into our cache */ + libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, + mntopts); + return (0); +} + +/* + * Unmount a single filesystem. + */ +static int +unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags) +{ + if (umount2(mountpoint, flags) != 0) { + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot unmount '%s'"), + mountpoint)); + } + + return (0); +} + +/* + * Unmount the given filesystem. + */ +int +zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + struct mnttab entry; + char *mntpt = NULL; + + /* check to see if we need to unmount the filesystem */ + if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) && + libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) { + /* + * mountpoint may have come from a call to + * getmnt/getmntany if it isn't NULL. If it is NULL, + * we know it comes from libzfs_mnttab_find which can + * then get freed later. We strdup it to play it safe. + */ + if (mountpoint == NULL) + mntpt = zfs_strdup(hdl, entry.mnt_mountp); + else + mntpt = zfs_strdup(hdl, mountpoint); + + /* + * Unshare and unmount the filesystem + */ + if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0) + return (-1); + + if (unmount_one(hdl, mntpt, flags) != 0) { + free(mntpt); + (void) zfs_shareall(zhp); + return (-1); + } + libzfs_mnttab_remove(hdl, zhp->zfs_name); + free(mntpt); + } + + return (0); +} + +/* + * Unmount this filesystem and any children inheriting the mountpoint property. + * To do this, just act like we're changing the mountpoint property, but don't + * remount the filesystems afterwards. + */ +int +zfs_unmountall(zfs_handle_t *zhp, int flags) +{ + prop_changelist_t *clp; + int ret; + + clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags); + if (clp == NULL) + return (-1); + + ret = changelist_prefix(clp); + changelist_free(clp); + + return (ret); +} + +boolean_t +zfs_is_shared(zfs_handle_t *zhp) +{ + zfs_share_type_t rc = 0; + zfs_share_proto_t *curr_proto; + + if (ZFS_IS_VOLUME(zhp)) + return (B_FALSE); + + for (curr_proto = share_all_proto; *curr_proto != PROTO_END; + curr_proto++) + rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto); + + return (rc ? B_TRUE : B_FALSE); +} + +int +zfs_share(zfs_handle_t *zhp) +{ + assert(!ZFS_IS_VOLUME(zhp)); + return (zfs_share_proto(zhp, share_all_proto)); +} + +int +zfs_unshare(zfs_handle_t *zhp) +{ + assert(!ZFS_IS_VOLUME(zhp)); + return (zfs_unshareall(zhp)); +} + +/* + * Check to see if the filesystem is currently shared. + */ +zfs_share_type_t +zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto) +{ + char *mountpoint; + zfs_share_type_t rc; + + if (!zfs_is_mounted(zhp, &mountpoint)) + return (SHARED_NOT_SHARED); + + if (rc = is_shared(zhp->zfs_hdl, mountpoint, proto)) { + if (where != NULL) + *where = mountpoint; + else + free(mountpoint); + return (rc); + } else { + free(mountpoint); + return (SHARED_NOT_SHARED); + } +} + +boolean_t +zfs_is_shared_nfs(zfs_handle_t *zhp, char **where) +{ + return (zfs_is_shared_proto(zhp, where, + PROTO_NFS) != SHARED_NOT_SHARED); +} + +boolean_t +zfs_is_shared_smb(zfs_handle_t *zhp, char **where) +{ + return (zfs_is_shared_proto(zhp, where, + PROTO_SMB) != SHARED_NOT_SHARED); +} + +/* + * Make sure things will work if libshare isn't installed by using + * wrapper functions that check to see that the pointers to functions + * initialized in _zfs_init_libshare() are actually present. + */ + +#ifdef sun +static sa_handle_t (*_sa_init)(int); +static void (*_sa_fini)(sa_handle_t); +static sa_share_t (*_sa_find_share)(sa_handle_t, char *); +static int (*_sa_enable_share)(sa_share_t, char *); +static int (*_sa_disable_share)(sa_share_t, char *); +static char *(*_sa_errorstr)(int); +static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *); +static boolean_t (*_sa_needs_refresh)(sa_handle_t *); +static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t); +static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t, + char *, char *, zprop_source_t, char *, char *, char *); +static void (*_sa_update_sharetab_ts)(sa_handle_t); +#endif + +/* + * _zfs_init_libshare() + * + * Find the libshare.so.1 entry points that we use here and save the + * values to be used later. This is triggered by the runtime loader. + * Make sure the correct ISA version is loaded. + */ + +#pragma init(_zfs_init_libshare) +static void +_zfs_init_libshare(void) +{ +#ifdef sun + void *libshare; + char path[MAXPATHLEN]; + char isa[MAXISALEN]; + +#if defined(_LP64) + if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1) + isa[0] = '\0'; +#else + isa[0] = '\0'; +#endif + (void) snprintf(path, MAXPATHLEN, + "/usr/lib/%s/libshare.so.1", isa); + + if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) { + _sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init"); + _sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini"); + _sa_find_share = (sa_share_t (*)(sa_handle_t, char *)) + dlsym(libshare, "sa_find_share"); + _sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare, + "sa_enable_share"); + _sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare, + "sa_disable_share"); + _sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr"); + _sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *)) + dlsym(libshare, "sa_parse_legacy_options"); + _sa_needs_refresh = (boolean_t (*)(sa_handle_t *)) + dlsym(libshare, "sa_needs_refresh"); + _sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t)) + dlsym(libshare, "sa_get_zfs_handle"); + _sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t, + sa_share_t, char *, char *, zprop_source_t, char *, + char *, char *))dlsym(libshare, "sa_zfs_process_share"); + _sa_update_sharetab_ts = (void (*)(sa_handle_t)) + dlsym(libshare, "sa_update_sharetab_ts"); + if (_sa_init == NULL || _sa_fini == NULL || + _sa_find_share == NULL || _sa_enable_share == NULL || + _sa_disable_share == NULL || _sa_errorstr == NULL || + _sa_parse_legacy_options == NULL || + _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL || + _sa_zfs_process_share == NULL || + _sa_update_sharetab_ts == NULL) { + _sa_init = NULL; + _sa_fini = NULL; + _sa_disable_share = NULL; + _sa_enable_share = NULL; + _sa_errorstr = NULL; + _sa_parse_legacy_options = NULL; + (void) dlclose(libshare); + _sa_needs_refresh = NULL; + _sa_get_zfs_handle = NULL; + _sa_zfs_process_share = NULL; + _sa_update_sharetab_ts = NULL; + } + } +#endif +} + +/* + * zfs_init_libshare(zhandle, service) + * + * Initialize the libshare API if it hasn't already been initialized. + * In all cases it returns 0 if it succeeded and an error if not. The + * service value is which part(s) of the API to initialize and is a + * direct map to the libshare sa_init(service) interface. + */ +int +zfs_init_libshare(libzfs_handle_t *zhandle, int service) +{ + int ret = SA_OK; + +#ifdef sun + if (_sa_init == NULL) + ret = SA_CONFIG_ERR; + + if (ret == SA_OK && zhandle->libzfs_shareflags & ZFSSHARE_MISS) { + /* + * We had a cache miss. Most likely it is a new ZFS + * dataset that was just created. We want to make sure + * so check timestamps to see if a different process + * has updated any of the configuration. If there was + * some non-ZFS change, we need to re-initialize the + * internal cache. + */ + zhandle->libzfs_shareflags &= ~ZFSSHARE_MISS; + if (_sa_needs_refresh != NULL && + _sa_needs_refresh(zhandle->libzfs_sharehdl)) { + zfs_uninit_libshare(zhandle); + zhandle->libzfs_sharehdl = _sa_init(service); + } + } + + if (ret == SA_OK && zhandle && zhandle->libzfs_sharehdl == NULL) + zhandle->libzfs_sharehdl = _sa_init(service); + + if (ret == SA_OK && zhandle->libzfs_sharehdl == NULL) + ret = SA_NO_MEMORY; +#endif + + return (ret); +} + +/* + * zfs_uninit_libshare(zhandle) + * + * Uninitialize the libshare API if it hasn't already been + * uninitialized. It is OK to call multiple times. + */ +void +zfs_uninit_libshare(libzfs_handle_t *zhandle) +{ + if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) { +#ifdef sun + if (_sa_fini != NULL) + _sa_fini(zhandle->libzfs_sharehdl); +#endif + zhandle->libzfs_sharehdl = NULL; + } +} + +/* + * zfs_parse_options(options, proto) + * + * Call the legacy parse interface to get the protocol specific + * options using the NULL arg to indicate that this is a "parse" only. + */ +int +zfs_parse_options(char *options, zfs_share_proto_t proto) +{ +#ifdef sun + if (_sa_parse_legacy_options != NULL) { + return (_sa_parse_legacy_options(NULL, options, + proto_table[proto].p_name)); + } + return (SA_CONFIG_ERR); +#else + return (SA_OK); +#endif +} + +#ifdef sun +/* + * zfs_sa_find_share(handle, path) + * + * wrapper around sa_find_share to find a share path in the + * configuration. + */ +static sa_share_t +zfs_sa_find_share(sa_handle_t handle, char *path) +{ + if (_sa_find_share != NULL) + return (_sa_find_share(handle, path)); + return (NULL); +} + +/* + * zfs_sa_enable_share(share, proto) + * + * Wrapper for sa_enable_share which enables a share for a specified + * protocol. + */ +static int +zfs_sa_enable_share(sa_share_t share, char *proto) +{ + if (_sa_enable_share != NULL) + return (_sa_enable_share(share, proto)); + return (SA_CONFIG_ERR); +} + +/* + * zfs_sa_disable_share(share, proto) + * + * Wrapper for sa_enable_share which disables a share for a specified + * protocol. + */ +static int +zfs_sa_disable_share(sa_share_t share, char *proto) +{ + if (_sa_disable_share != NULL) + return (_sa_disable_share(share, proto)); + return (SA_CONFIG_ERR); +} +#endif /* sun */ + +/* + * Share the given filesystem according to the options in the specified + * protocol specific properties (sharenfs, sharesmb). We rely + * on "libshare" to the dirty work for us. + */ +static int +zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) +{ + char mountpoint[ZFS_MAXPROPLEN]; + char shareopts[ZFS_MAXPROPLEN]; + char sourcestr[ZFS_MAXPROPLEN]; + libzfs_handle_t *hdl = zhp->zfs_hdl; + zfs_share_proto_t *curr_proto; + zprop_source_t sourcetype; + int error, ret; + + if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) + return (0); + + for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) { + /* + * Return success if there are no share options. + */ + if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop, + shareopts, sizeof (shareopts), &sourcetype, sourcestr, + ZFS_MAXPROPLEN, B_FALSE) != 0 || + strcmp(shareopts, "off") == 0) + continue; + +#ifdef illumos + ret = zfs_init_libshare(hdl, SA_INIT_SHARE_API); + if (ret != SA_OK) { + (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED, + dgettext(TEXT_DOMAIN, "cannot share '%s': %s"), + zfs_get_name(zhp), _sa_errorstr != NULL ? + _sa_errorstr(ret) : ""); + return (-1); + } +#endif + + /* + * If the 'zoned' property is set, then zfs_is_mountable() + * will have already bailed out if we are in the global zone. + * But local zones cannot be NFS servers, so we ignore it for + * local zones as well. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) + continue; + +#ifdef sun + share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint); + if (share == NULL) { + /* + * This may be a new file system that was just + * created so isn't in the internal cache + * (second time through). Rather than + * reloading the entire configuration, we can + * assume ZFS has done the checking and it is + * safe to add this to the internal + * configuration. + */ + if (_sa_zfs_process_share(hdl->libzfs_sharehdl, + NULL, NULL, mountpoint, + proto_table[*curr_proto].p_name, sourcetype, + shareopts, sourcestr, zhp->zfs_name) != SA_OK) { + (void) zfs_error_fmt(hdl, + proto_table[*curr_proto].p_share_err, + dgettext(TEXT_DOMAIN, "cannot share '%s'"), + zfs_get_name(zhp)); + return (-1); + } + hdl->libzfs_shareflags |= ZFSSHARE_MISS; + share = zfs_sa_find_share(hdl->libzfs_sharehdl, + mountpoint); + } + if (share != NULL) { + int err; + err = zfs_sa_enable_share(share, + proto_table[*curr_proto].p_name); + if (err != SA_OK) { + (void) zfs_error_fmt(hdl, + proto_table[*curr_proto].p_share_err, + dgettext(TEXT_DOMAIN, "cannot share '%s'"), + zfs_get_name(zhp)); + return (-1); + } + } else +#else + if (*curr_proto != PROTO_NFS) { + fprintf(stderr, "Unsupported share protocol: %d.\n", + *curr_proto); + continue; + } + + if (strcmp(shareopts, "on") == 0) + error = fsshare(ZFS_EXPORTS_PATH, mountpoint, ""); + else + error = fsshare(ZFS_EXPORTS_PATH, mountpoint, shareopts); + if (error != 0) +#endif + { + (void) zfs_error_fmt(hdl, + proto_table[*curr_proto].p_share_err, + dgettext(TEXT_DOMAIN, "cannot share '%s'"), + zfs_get_name(zhp)); + return (-1); + } + + } + return (0); +} + + +int +zfs_share_nfs(zfs_handle_t *zhp) +{ + return (zfs_share_proto(zhp, nfs_only)); +} + +int +zfs_share_smb(zfs_handle_t *zhp) +{ + return (zfs_share_proto(zhp, smb_only)); +} + +int +zfs_shareall(zfs_handle_t *zhp) +{ + return (zfs_share_proto(zhp, share_all_proto)); +} + +/* + * Unshare a filesystem by mountpoint. + */ +static int +unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint, + zfs_share_proto_t proto) +{ +#ifdef sun + sa_share_t share; + int err; + char *mntpt; + /* + * Mountpoint could get trashed if libshare calls getmntany + * which it does during API initialization, so strdup the + * value. + */ + mntpt = zfs_strdup(hdl, mountpoint); + + /* make sure libshare initialized */ + if ((err = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) { + free(mntpt); /* don't need the copy anymore */ + return (zfs_error_fmt(hdl, EZFS_SHARENFSFAILED, + dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"), + name, _sa_errorstr(err))); + } + + share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt); + free(mntpt); /* don't need the copy anymore */ + + if (share != NULL) { + err = zfs_sa_disable_share(share, proto_table[proto].p_name); + if (err != SA_OK) { + return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED, + dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"), + name, _sa_errorstr(err))); + } + } else { + return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED, + dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"), + name)); + } +#else + char buf[MAXPATHLEN]; + FILE *fp; + int err; + + if (proto != PROTO_NFS) { + fprintf(stderr, "No SMB support in FreeBSD yet.\n"); + return (EOPNOTSUPP); + } + + err = fsunshare(ZFS_EXPORTS_PATH, mountpoint); + if (err != 0) { + zfs_error_aux(hdl, "%s", strerror(err)); + return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED, + dgettext(TEXT_DOMAIN, + "cannot unshare '%s'"), name)); + } +#endif + return (0); +} + +/* + * Unshare the given filesystem. + */ +int +zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint, + zfs_share_proto_t *proto) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + struct mnttab entry; + char *mntpt = NULL; + + /* check to see if need to unmount the filesystem */ + rewind(zhp->zfs_hdl->libzfs_mnttab); + if (mountpoint != NULL) + mountpoint = mntpt = zfs_strdup(hdl, mountpoint); + + if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) && + libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) { + zfs_share_proto_t *curr_proto; + + if (mountpoint == NULL) + mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp); + + for (curr_proto = proto; *curr_proto != PROTO_END; + curr_proto++) { + + if (is_shared(hdl, mntpt, *curr_proto) && + unshare_one(hdl, zhp->zfs_name, + mntpt, *curr_proto) != 0) { + if (mntpt != NULL) + free(mntpt); + return (-1); + } + } + } + if (mntpt != NULL) + free(mntpt); + + return (0); +} + +int +zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint) +{ + return (zfs_unshare_proto(zhp, mountpoint, nfs_only)); +} + +int +zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint) +{ + return (zfs_unshare_proto(zhp, mountpoint, smb_only)); +} + +/* + * Same as zfs_unmountall(), but for NFS and SMB unshares. + */ +int +zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) +{ + prop_changelist_t *clp; + int ret; + + clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0); + if (clp == NULL) + return (-1); + + ret = changelist_unshare(clp, proto); + changelist_free(clp); + + return (ret); +} + +int +zfs_unshareall_nfs(zfs_handle_t *zhp) +{ + return (zfs_unshareall_proto(zhp, nfs_only)); +} + +int +zfs_unshareall_smb(zfs_handle_t *zhp) +{ + return (zfs_unshareall_proto(zhp, smb_only)); +} + +int +zfs_unshareall(zfs_handle_t *zhp) +{ + return (zfs_unshareall_proto(zhp, share_all_proto)); +} + +int +zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint) +{ + return (zfs_unshare_proto(zhp, mountpoint, share_all_proto)); +} + +/* + * Remove the mountpoint associated with the current dataset, if necessary. + * We only remove the underlying directory if: + * + * - The mountpoint is not 'none' or 'legacy' + * - The mountpoint is non-empty + * - The mountpoint is the default or inherited + * - The 'zoned' property is set, or we're in a local zone + * + * Any other directories we leave alone. + */ +void +remove_mountpoint(zfs_handle_t *zhp) +{ + char mountpoint[ZFS_MAXPROPLEN]; + zprop_source_t source; + + if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), + &source)) + return; + + if (source == ZPROP_SRC_DEFAULT || + source == ZPROP_SRC_INHERITED) { + /* + * Try to remove the directory, silently ignoring any errors. + * The filesystem may have since been removed or moved around, + * and this error isn't really useful to the administrator in + * any way. + */ + (void) rmdir(mountpoint); + } +} + +void +libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp) +{ + if (cbp->cb_alloc == cbp->cb_used) { + size_t newsz; + void *ptr; + + newsz = cbp->cb_alloc ? cbp->cb_alloc * 2 : 64; + ptr = zfs_realloc(zhp->zfs_hdl, + cbp->cb_handles, cbp->cb_alloc * sizeof (void *), + newsz * sizeof (void *)); + cbp->cb_handles = ptr; + cbp->cb_alloc = newsz; + } + cbp->cb_handles[cbp->cb_used++] = zhp; +} + +static int +mount_cb(zfs_handle_t *zhp, void *data) +{ + get_all_cb_t *cbp = data; + + if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) { + zfs_close(zhp); + return (0); + } + + if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) { + zfs_close(zhp); + return (0); + } + + libzfs_add_handle(cbp, zhp); + if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) { + zfs_close(zhp); + return (-1); + } + return (0); +} + +int +libzfs_dataset_cmp(const void *a, const void *b) +{ + zfs_handle_t **za = (zfs_handle_t **)a; + zfs_handle_t **zb = (zfs_handle_t **)b; + char mounta[MAXPATHLEN]; + char mountb[MAXPATHLEN]; + boolean_t gota, gotb; + + if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0) + verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta, + sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); + if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0) + verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb, + sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); + + if (gota && gotb) + return (strcmp(mounta, mountb)); + + if (gota) + return (-1); + if (gotb) + return (1); + + return (strcmp(zfs_get_name(a), zfs_get_name(b))); +} + +/* + * Mount and share all datasets within the given pool. This assumes that no + * datasets within the pool are currently mounted. Because users can create + * complicated nested hierarchies of mountpoints, we first gather all the + * datasets and mountpoints within the pool, and sort them by mountpoint. Once + * we have the list of all filesystems, we iterate over them in order and mount + * and/or share each one. + */ +#pragma weak zpool_mount_datasets = zpool_enable_datasets +int +zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +{ + get_all_cb_t cb = { 0 }; + libzfs_handle_t *hdl = zhp->zpool_hdl; + zfs_handle_t *zfsp; + int i, ret = -1; + int *good; + + /* + * Gather all non-snap datasets within the pool. + */ + if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL) + goto out; + + libzfs_add_handle(&cb, zfsp); + if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0) + goto out; + /* + * Sort the datasets by mountpoint. + */ + qsort(cb.cb_handles, cb.cb_used, sizeof (void *), + libzfs_dataset_cmp); + + /* + * And mount all the datasets, keeping track of which ones + * succeeded or failed. + */ + if ((good = zfs_alloc(zhp->zpool_hdl, + cb.cb_used * sizeof (int))) == NULL) + goto out; + + ret = 0; + for (i = 0; i < cb.cb_used; i++) { + if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0) + ret = -1; + else + good[i] = 1; + } + + /* + * Then share all the ones that need to be shared. This needs + * to be a separate pass in order to avoid excessive reloading + * of the configuration. Good should never be NULL since + * zfs_alloc is supposed to exit if memory isn't available. + */ + for (i = 0; i < cb.cb_used; i++) { + if (good[i] && zfs_share(cb.cb_handles[i]) != 0) + ret = -1; + } + + free(good); + +out: + for (i = 0; i < cb.cb_used; i++) + zfs_close(cb.cb_handles[i]); + free(cb.cb_handles); + + return (ret); +} + +static int +mountpoint_compare(const void *a, const void *b) +{ + const char *mounta = *((char **)a); + const char *mountb = *((char **)b); + + return (strcmp(mountb, mounta)); +} + +/* alias for 2002/240 */ +#pragma weak zpool_unmount_datasets = zpool_disable_datasets +/* + * Unshare and unmount all datasets within the given pool. We don't want to + * rely on traversing the DSL to discover the filesystems within the pool, + * because this may be expensive (if not all of them are mounted), and can fail + * arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and + * gather all the filesystems that are currently mounted. + */ +int +zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) +{ + int used, alloc; + struct mnttab entry; + size_t namelen; + char **mountpoints = NULL; + zfs_handle_t **datasets = NULL; + libzfs_handle_t *hdl = zhp->zpool_hdl; + int i; + int ret = -1; + int flags = (force ? MS_FORCE : 0); + + namelen = strlen(zhp->zpool_name); + + rewind(hdl->libzfs_mnttab); + used = alloc = 0; + while (getmntent(hdl->libzfs_mnttab, &entry) == 0) { + /* + * Ignore non-ZFS entries. + */ + if (entry.mnt_fstype == NULL || + strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) + continue; + + /* + * Ignore filesystems not within this pool. + */ + if (entry.mnt_mountp == NULL || + strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 || + (entry.mnt_special[namelen] != '/' && + entry.mnt_special[namelen] != '\0')) + continue; + + /* + * At this point we've found a filesystem within our pool. Add + * it to our growing list. + */ + if (used == alloc) { + if (alloc == 0) { + if ((mountpoints = zfs_alloc(hdl, + 8 * sizeof (void *))) == NULL) + goto out; + + if ((datasets = zfs_alloc(hdl, + 8 * sizeof (void *))) == NULL) + goto out; + + alloc = 8; + } else { + void *ptr; + + if ((ptr = zfs_realloc(hdl, mountpoints, + alloc * sizeof (void *), + alloc * 2 * sizeof (void *))) == NULL) + goto out; + mountpoints = ptr; + + if ((ptr = zfs_realloc(hdl, datasets, + alloc * sizeof (void *), + alloc * 2 * sizeof (void *))) == NULL) + goto out; + datasets = ptr; + + alloc *= 2; + } + } + + if ((mountpoints[used] = zfs_strdup(hdl, + entry.mnt_mountp)) == NULL) + goto out; + + /* + * This is allowed to fail, in case there is some I/O error. It + * is only used to determine if we need to remove the underlying + * mountpoint, so failure is not fatal. + */ + datasets[used] = make_dataset_handle(hdl, entry.mnt_special); + + used++; + } + + /* + * At this point, we have the entire list of filesystems, so sort it by + * mountpoint. + */ + qsort(mountpoints, used, sizeof (char *), mountpoint_compare); + + /* + * Walk through and first unshare everything. + */ + for (i = 0; i < used; i++) { + zfs_share_proto_t *curr_proto; + for (curr_proto = share_all_proto; *curr_proto != PROTO_END; + curr_proto++) { + if (is_shared(hdl, mountpoints[i], *curr_proto) && + unshare_one(hdl, mountpoints[i], + mountpoints[i], *curr_proto) != 0) + goto out; + } + } + + /* + * Now unmount everything, removing the underlying directories as + * appropriate. + */ + for (i = 0; i < used; i++) { + if (unmount_one(hdl, mountpoints[i], flags) != 0) + goto out; + } + + for (i = 0; i < used; i++) { + if (datasets[i]) + remove_mountpoint(datasets[i]); + } + + ret = 0; +out: + for (i = 0; i < used; i++) { + if (datasets[i]) + zfs_close(datasets[i]); + free(mountpoints[i]); + } + free(datasets); + free(mountpoints); + + return (ret); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c new file mode 100644 index 0000000..e09417a --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -0,0 +1,4172 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <ctype.h> +#include <errno.h> +#include <devid.h> +#include <fcntl.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <libgen.h> +#include <sys/zfs_ioctl.h> +#include <dlfcn.h> + +#include "zfs_namecheck.h" +#include "zfs_prop.h" +#include "libzfs_impl.h" +#include "zfs_comutil.h" +#include "zfeature_common.h" + +static int read_efi_label(nvlist_t *config, diskaddr_t *sb); + +#define DISK_ROOT "/dev/dsk" +#define RDISK_ROOT "/dev/rdsk" +#define BACKUP_SLICE "s2" + +typedef struct prop_flags { + int create:1; /* Validate property on creation */ + int import:1; /* Validate property on import */ +} prop_flags_t; + +/* + * ==================================================================== + * zpool property functions + * ==================================================================== + */ + +static int +zpool_get_all_props(zpool_handle_t *zhp) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + + if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) + return (-1); + + while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) { + if (errno == ENOMEM) { + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + } else { + zcmd_free_nvlists(&zc); + return (-1); + } + } + + if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + + zcmd_free_nvlists(&zc); + + return (0); +} + +static int +zpool_props_refresh(zpool_handle_t *zhp) +{ + nvlist_t *old_props; + + old_props = zhp->zpool_props; + + if (zpool_get_all_props(zhp) != 0) + return (-1); + + nvlist_free(old_props); + return (0); +} + +static char * +zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop, + zprop_source_t *src) +{ + nvlist_t *nv, *nvl; + uint64_t ival; + char *value; + zprop_source_t source; + + nvl = zhp->zpool_props; + if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) { + verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0); + source = ival; + verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0); + } else { + source = ZPROP_SRC_DEFAULT; + if ((value = (char *)zpool_prop_default_string(prop)) == NULL) + value = "-"; + } + + if (src) + *src = source; + + return (value); +} + +uint64_t +zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src) +{ + nvlist_t *nv, *nvl; + uint64_t value; + zprop_source_t source; + + if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) { + /* + * zpool_get_all_props() has most likely failed because + * the pool is faulted, but if all we need is the top level + * vdev's guid then get it from the zhp config nvlist. + */ + if ((prop == ZPOOL_PROP_GUID) && + (nvlist_lookup_nvlist(zhp->zpool_config, + ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) && + (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value) + == 0)) { + return (value); + } + return (zpool_prop_default_numeric(prop)); + } + + nvl = zhp->zpool_props; + if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) { + verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0); + source = value; + verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0); + } else { + source = ZPROP_SRC_DEFAULT; + value = zpool_prop_default_numeric(prop); + } + + if (src) + *src = source; + + return (value); +} + +/* + * Map VDEV STATE to printed strings. + */ +const char * +zpool_state_to_name(vdev_state_t state, vdev_aux_t aux) +{ + switch (state) { + case VDEV_STATE_CLOSED: + case VDEV_STATE_OFFLINE: + return (gettext("OFFLINE")); + case VDEV_STATE_REMOVED: + return (gettext("REMOVED")); + case VDEV_STATE_CANT_OPEN: + if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG) + return (gettext("FAULTED")); + else if (aux == VDEV_AUX_SPLIT_POOL) + return (gettext("SPLIT")); + else + return (gettext("UNAVAIL")); + case VDEV_STATE_FAULTED: + return (gettext("FAULTED")); + case VDEV_STATE_DEGRADED: + return (gettext("DEGRADED")); + case VDEV_STATE_HEALTHY: + return (gettext("ONLINE")); + } + + return (gettext("UNKNOWN")); +} + +/* + * Map POOL STATE to printed strings. + */ +const char * +zpool_pool_state_to_name(pool_state_t state) +{ + switch (state) { + case POOL_STATE_ACTIVE: + return (gettext("ACTIVE")); + case POOL_STATE_EXPORTED: + return (gettext("EXPORTED")); + case POOL_STATE_DESTROYED: + return (gettext("DESTROYED")); + case POOL_STATE_SPARE: + return (gettext("SPARE")); + case POOL_STATE_L2CACHE: + return (gettext("L2CACHE")); + case POOL_STATE_UNINITIALIZED: + return (gettext("UNINITIALIZED")); + case POOL_STATE_UNAVAIL: + return (gettext("UNAVAIL")); + case POOL_STATE_POTENTIALLY_ACTIVE: + return (gettext("POTENTIALLY_ACTIVE")); + } + + return (gettext("UNKNOWN")); +} + +/* + * Get a zpool property value for 'prop' and return the value in + * a pre-allocated buffer. + */ +int +zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len, + zprop_source_t *srctype, boolean_t literal) +{ + uint64_t intval; + const char *strval; + zprop_source_t src = ZPROP_SRC_NONE; + nvlist_t *nvroot; + vdev_stat_t *vs; + uint_t vsc; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + switch (prop) { + case ZPOOL_PROP_NAME: + (void) strlcpy(buf, zpool_get_name(zhp), len); + break; + + case ZPOOL_PROP_HEALTH: + (void) strlcpy(buf, "FAULTED", len); + break; + + case ZPOOL_PROP_GUID: + intval = zpool_get_prop_int(zhp, prop, &src); + (void) snprintf(buf, len, "%llu", intval); + break; + + case ZPOOL_PROP_ALTROOT: + case ZPOOL_PROP_CACHEFILE: + case ZPOOL_PROP_COMMENT: + if (zhp->zpool_props != NULL || + zpool_get_all_props(zhp) == 0) { + (void) strlcpy(buf, + zpool_get_prop_string(zhp, prop, &src), + len); + break; + } + /* FALLTHROUGH */ + default: + (void) strlcpy(buf, "-", len); + break; + } + + if (srctype != NULL) + *srctype = src; + return (0); + } + + if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) && + prop != ZPOOL_PROP_NAME) + return (-1); + + switch (zpool_prop_get_type(prop)) { + case PROP_TYPE_STRING: + (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src), + len); + break; + + case PROP_TYPE_NUMBER: + intval = zpool_get_prop_int(zhp, prop, &src); + + switch (prop) { + case ZPOOL_PROP_SIZE: + case ZPOOL_PROP_ALLOCATED: + case ZPOOL_PROP_FREE: + case ZPOOL_PROP_FREEING: + case ZPOOL_PROP_LEAKED: + if (literal) { + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } else { + (void) zfs_nicenum(intval, buf, len); + } + break; + case ZPOOL_PROP_EXPANDSZ: + if (intval == 0) { + (void) strlcpy(buf, "-", len); + } else if (literal) { + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } else { + (void) zfs_nicenum(intval, buf, len); + } + break; + case ZPOOL_PROP_CAPACITY: + if (literal) { + (void) snprintf(buf, len, "%llu", + (u_longlong_t)intval); + } else { + (void) snprintf(buf, len, "%llu%%", + (u_longlong_t)intval); + } + break; + case ZPOOL_PROP_FRAGMENTATION: + if (intval == UINT64_MAX) { + (void) strlcpy(buf, "-", len); + } else { + (void) snprintf(buf, len, "%llu%%", + (u_longlong_t)intval); + } + break; + case ZPOOL_PROP_DEDUPRATIO: + (void) snprintf(buf, len, "%llu.%02llux", + (u_longlong_t)(intval / 100), + (u_longlong_t)(intval % 100)); + break; + case ZPOOL_PROP_HEALTH: + verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + verify(nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) + == 0); + + (void) strlcpy(buf, zpool_state_to_name(intval, + vs->vs_aux), len); + break; + case ZPOOL_PROP_VERSION: + if (intval >= SPA_VERSION_FEATURES) { + (void) snprintf(buf, len, "-"); + break; + } + /* FALLTHROUGH */ + default: + (void) snprintf(buf, len, "%llu", intval); + } + break; + + case PROP_TYPE_INDEX: + intval = zpool_get_prop_int(zhp, prop, &src); + if (zpool_prop_index_to_string(prop, intval, &strval) + != 0) + return (-1); + (void) strlcpy(buf, strval, len); + break; + + default: + abort(); + } + + if (srctype) + *srctype = src; + + return (0); +} + +/* + * Check if the bootfs name has the same pool name as it is set to. + * Assuming bootfs is a valid dataset name. + */ +static boolean_t +bootfs_name_valid(const char *pool, char *bootfs) +{ + int len = strlen(pool); + + if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT)) + return (B_FALSE); + + if (strncmp(pool, bootfs, len) == 0 && + (bootfs[len] == '/' || bootfs[len] == '\0')) + return (B_TRUE); + + return (B_FALSE); +} + +/* + * Inspect the configuration to determine if any of the devices contain + * an EFI label. + */ +static boolean_t +pool_uses_efi(nvlist_t *config) +{ +#ifdef sun + nvlist_t **child; + uint_t c, children; + + if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return (read_efi_label(config, NULL) >= 0); + + for (c = 0; c < children; c++) { + if (pool_uses_efi(child[c])) + return (B_TRUE); + } +#endif /* sun */ + return (B_FALSE); +} + +boolean_t +zpool_is_bootable(zpool_handle_t *zhp) +{ + char bootfs[ZPOOL_MAXNAMELEN]; + + return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs, + sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-", + sizeof (bootfs)) != 0); +} + + +/* + * Given an nvlist of zpool properties to be set, validate that they are + * correct, and parse any numeric properties (index, boolean, etc) if they are + * specified as strings. + */ +static nvlist_t * +zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, + nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf) +{ + nvpair_t *elem; + nvlist_t *retprops; + zpool_prop_t prop; + char *strval; + uint64_t intval; + char *slash, *check; + struct stat64 statbuf; + zpool_handle_t *zhp; + nvlist_t *nvroot; + + if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) { + (void) no_memory(hdl); + return (NULL); + } + + elem = NULL; + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + const char *propname = nvpair_name(elem); + + prop = zpool_name_to_prop(propname); + if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) { + int err; + char *fname = strchr(propname, '@') + 1; + + err = zfeature_lookup_name(fname, NULL); + if (err != 0) { + ASSERT3U(err, ==, ENOENT); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid feature '%s'"), fname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (nvpair_type(elem) != DATA_TYPE_STRING) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be a string"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + (void) nvpair_value_string(elem, &strval); + if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' can only be set to " + "'enabled'"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (nvlist_add_uint64(retprops, propname, 0) != 0) { + (void) no_memory(hdl); + goto error; + } + continue; + } + + /* + * Make sure this property is valid and applies to this type. + */ + if (prop == ZPROP_INVAL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid property '%s'"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (zpool_prop_readonly(prop)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' " + "is readonly"), propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf); + goto error; + } + + if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops, + &strval, &intval, errbuf) != 0) + goto error; + + /* + * Perform additional checking for specific properties. + */ + switch (prop) { + case ZPOOL_PROP_VERSION: + if (intval < version || + !SPA_VERSION_IS_SUPPORTED(intval)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' number %d is invalid."), + propname, intval); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + goto error; + } + break; + + case ZPOOL_PROP_BOOTFS: + if (flags.create || flags.import) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' cannot be set at creation " + "or import time"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (version < SPA_VERSION_BOOTFS) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded to support " + "'%s' property"), propname); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + goto error; + } + + /* + * bootfs property value has to be a dataset name and + * the dataset has to be in the same pool as it sets to. + */ + if (strval[0] != '\0' && !bootfs_name_valid(poolname, + strval)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' " + "is an invalid name"), strval); + (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto error; + } + + if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "could not open pool '%s'"), poolname); + (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); + goto error; + } + verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + +#ifdef sun + /* + * bootfs property cannot be set on a disk which has + * been EFI labeled. + */ + if (pool_uses_efi(nvroot)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' not supported on " + "EFI labeled devices"), propname); + (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf); + zpool_close(zhp); + goto error; + } +#endif /* sun */ + zpool_close(zhp); + break; + + case ZPOOL_PROP_ALTROOT: + if (!flags.create && !flags.import) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' can only be set during pool " + "creation or import"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (strval[0] != '/') { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "bad alternate root '%s'"), strval); + (void) zfs_error(hdl, EZFS_BADPATH, errbuf); + goto error; + } + break; + + case ZPOOL_PROP_CACHEFILE: + if (strval[0] == '\0') + break; + + if (strcmp(strval, "none") == 0) + break; + + if (strval[0] != '/') { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' must be empty, an " + "absolute path, or 'none'"), propname); + (void) zfs_error(hdl, EZFS_BADPATH, errbuf); + goto error; + } + + slash = strrchr(strval, '/'); + + if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || + strcmp(slash, "/..") == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is not a valid file"), strval); + (void) zfs_error(hdl, EZFS_BADPATH, errbuf); + goto error; + } + + *slash = '\0'; + + if (strval[0] != '\0' && + (stat64(strval, &statbuf) != 0 || + !S_ISDIR(statbuf.st_mode))) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is not a valid directory"), + strval); + (void) zfs_error(hdl, EZFS_BADPATH, errbuf); + goto error; + } + + *slash = '/'; + break; + + case ZPOOL_PROP_COMMENT: + for (check = strval; *check != '\0'; check++) { + if (!isprint(*check)) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "comment may only have printable " + "characters")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + } + if (strlen(strval) > ZPROP_MAX_COMMENT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "comment must not exceed %d characters"), + ZPROP_MAX_COMMENT); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + case ZPOOL_PROP_READONLY: + if (!flags.import) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' can only be set at " + "import time"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + } + } + + return (retprops); +error: + nvlist_free(retprops); + return (NULL); +} + +/* + * Set zpool property : propname=propval. + */ +int +zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval) +{ + zfs_cmd_t zc = { 0 }; + int ret = -1; + char errbuf[1024]; + nvlist_t *nvl = NULL; + nvlist_t *realprops; + uint64_t version; + prop_flags_t flags = { 0 }; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot set property for '%s'"), + zhp->zpool_name); + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) + return (no_memory(zhp->zpool_hdl)); + + if (nvlist_add_string(nvl, propname, propval) != 0) { + nvlist_free(nvl); + return (no_memory(zhp->zpool_hdl)); + } + + version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); + if ((realprops = zpool_valid_proplist(zhp->zpool_hdl, + zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) { + nvlist_free(nvl); + return (-1); + } + + nvlist_free(nvl); + nvl = realprops; + + /* + * Execute the corresponding ioctl() to set this property. + */ + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + + if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) { + nvlist_free(nvl); + return (-1); + } + + ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc); + + zcmd_free_nvlists(&zc); + nvlist_free(nvl); + + if (ret) + (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf); + else + (void) zpool_props_refresh(zhp); + + return (ret); +} + +int +zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp) +{ + libzfs_handle_t *hdl = zhp->zpool_hdl; + zprop_list_t *entry; + char buf[ZFS_MAXPROPLEN]; + nvlist_t *features = NULL; + zprop_list_t **last; + boolean_t firstexpand = (NULL == *plp); + + if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0) + return (-1); + + last = plp; + while (*last != NULL) + last = &(*last)->pl_next; + + if ((*plp)->pl_all) + features = zpool_get_features(zhp); + + if ((*plp)->pl_all && firstexpand) { + for (int i = 0; i < SPA_FEATURES; i++) { + zprop_list_t *entry = zfs_alloc(hdl, + sizeof (zprop_list_t)); + entry->pl_prop = ZPROP_INVAL; + entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s", + spa_feature_table[i].fi_uname); + entry->pl_width = strlen(entry->pl_user_prop); + entry->pl_all = B_TRUE; + + *last = entry; + last = &entry->pl_next; + } + } + + /* add any unsupported features */ + for (nvpair_t *nvp = nvlist_next_nvpair(features, NULL); + nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) { + char *propname; + boolean_t found; + zprop_list_t *entry; + + if (zfeature_is_supported(nvpair_name(nvp))) + continue; + + propname = zfs_asprintf(hdl, "unsupported@%s", + nvpair_name(nvp)); + + /* + * Before adding the property to the list make sure that no + * other pool already added the same property. + */ + found = B_FALSE; + entry = *plp; + while (entry != NULL) { + if (entry->pl_user_prop != NULL && + strcmp(propname, entry->pl_user_prop) == 0) { + found = B_TRUE; + break; + } + entry = entry->pl_next; + } + if (found) { + free(propname); + continue; + } + + entry = zfs_alloc(hdl, sizeof (zprop_list_t)); + entry->pl_prop = ZPROP_INVAL; + entry->pl_user_prop = propname; + entry->pl_width = strlen(entry->pl_user_prop); + entry->pl_all = B_TRUE; + + *last = entry; + last = &entry->pl_next; + } + + for (entry = *plp; entry != NULL; entry = entry->pl_next) { + + if (entry->pl_fixed) + continue; + + if (entry->pl_prop != ZPROP_INVAL && + zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf), + NULL, B_FALSE) == 0) { + if (strlen(buf) > entry->pl_width) + entry->pl_width = strlen(buf); + } + } + + return (0); +} + +/* + * Get the state for the given feature on the given ZFS pool. + */ +int +zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf, + size_t len) +{ + uint64_t refcount; + boolean_t found = B_FALSE; + nvlist_t *features = zpool_get_features(zhp); + boolean_t supported; + const char *feature = strchr(propname, '@') + 1; + + supported = zpool_prop_feature(propname); + ASSERT(supported || zpool_prop_unsupported(propname)); + + /* + * Convert from feature name to feature guid. This conversion is + * unecessary for unsupported@... properties because they already + * use guids. + */ + if (supported) { + int ret; + spa_feature_t fid; + + ret = zfeature_lookup_name(feature, &fid); + if (ret != 0) { + (void) strlcpy(buf, "-", len); + return (ENOTSUP); + } + feature = spa_feature_table[fid].fi_guid; + } + + if (nvlist_lookup_uint64(features, feature, &refcount) == 0) + found = B_TRUE; + + if (supported) { + if (!found) { + (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len); + } else { + if (refcount == 0) + (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len); + else + (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len); + } + } else { + if (found) { + if (refcount == 0) { + (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE); + } else { + (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY); + } + } else { + (void) strlcpy(buf, "-", len); + return (ENOTSUP); + } + } + + return (0); +} + +/* + * Don't start the slice at the default block of 34; many storage + * devices will use a stripe width of 128k, so start there instead. + */ +#define NEW_START_BLOCK 256 + +/* + * Validate the given pool name, optionally putting an extended error message in + * 'buf'. + */ +boolean_t +zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool) +{ + namecheck_err_t why; + char what; + int ret; + + ret = pool_namecheck(pool, &why, &what); + + /* + * The rules for reserved pool names were extended at a later point. + * But we need to support users with existing pools that may now be + * invalid. So we only check for this expanded set of names during a + * create (or import), and only in userland. + */ + if (ret == 0 && !isopen && + (strncmp(pool, "mirror", 6) == 0 || + strncmp(pool, "raidz", 5) == 0 || + strncmp(pool, "spare", 5) == 0 || + strcmp(pool, "log") == 0)) { + if (hdl != NULL) + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "name is reserved")); + return (B_FALSE); + } + + + if (ret != 0) { + if (hdl != NULL) { + switch (why) { + case NAME_ERR_TOOLONG: + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "name is too long")); + break; + + case NAME_ERR_INVALCHAR: + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "invalid character " + "'%c' in pool name"), what); + break; + + case NAME_ERR_NOLETTER: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "name must begin with a letter")); + break; + + case NAME_ERR_RESERVED: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "name is reserved")); + break; + + case NAME_ERR_DISKLIKE: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool name is reserved")); + break; + + case NAME_ERR_LEADING_SLASH: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "leading slash in name")); + break; + + case NAME_ERR_EMPTY_COMPONENT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "empty component in name")); + break; + + case NAME_ERR_TRAILING_SLASH: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "trailing slash in name")); + break; + + case NAME_ERR_MULTIPLE_AT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "multiple '@' delimiters in name")); + break; + + } + } + return (B_FALSE); + } + + return (B_TRUE); +} + +/* + * Open a handle to the given pool, even if the pool is currently in the FAULTED + * state. + */ +zpool_handle_t * +zpool_open_canfail(libzfs_handle_t *hdl, const char *pool) +{ + zpool_handle_t *zhp; + boolean_t missing; + + /* + * Make sure the pool name is valid. + */ + if (!zpool_name_valid(hdl, B_TRUE, pool)) { + (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), + pool); + return (NULL); + } + + if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL) + return (NULL); + + zhp->zpool_hdl = hdl; + (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name)); + + if (zpool_refresh_stats(zhp, &missing) != 0) { + zpool_close(zhp); + return (NULL); + } + + if (missing) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool")); + (void) zfs_error_fmt(hdl, EZFS_NOENT, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool); + zpool_close(zhp); + return (NULL); + } + + return (zhp); +} + +/* + * Like the above, but silent on error. Used when iterating over pools (because + * the configuration cache may be out of date). + */ +int +zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret) +{ + zpool_handle_t *zhp; + boolean_t missing; + + if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL) + return (-1); + + zhp->zpool_hdl = hdl; + (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name)); + + if (zpool_refresh_stats(zhp, &missing) != 0) { + zpool_close(zhp); + return (-1); + } + + if (missing) { + zpool_close(zhp); + *ret = NULL; + return (0); + } + + *ret = zhp; + return (0); +} + +/* + * Similar to zpool_open_canfail(), but refuses to open pools in the faulted + * state. + */ +zpool_handle_t * +zpool_open(libzfs_handle_t *hdl, const char *pool) +{ + zpool_handle_t *zhp; + + if ((zhp = zpool_open_canfail(hdl, pool)) == NULL) + return (NULL); + + if (zhp->zpool_state == POOL_STATE_UNAVAIL) { + (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL, + dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name); + zpool_close(zhp); + return (NULL); + } + + return (zhp); +} + +/* + * Close the handle. Simply frees the memory associated with the handle. + */ +void +zpool_close(zpool_handle_t *zhp) +{ + if (zhp->zpool_config) + nvlist_free(zhp->zpool_config); + if (zhp->zpool_old_config) + nvlist_free(zhp->zpool_old_config); + if (zhp->zpool_props) + nvlist_free(zhp->zpool_props); + free(zhp); +} + +/* + * Return the name of the pool. + */ +const char * +zpool_get_name(zpool_handle_t *zhp) +{ + return (zhp->zpool_name); +} + + +/* + * Return the state of the pool (ACTIVE or UNAVAILABLE) + */ +int +zpool_get_state(zpool_handle_t *zhp) +{ + return (zhp->zpool_state); +} + +/* + * Create the named pool, using the provided vdev list. It is assumed + * that the consumer has already validated the contents of the nvlist, so we + * don't have to worry about error semantics. + */ +int +zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, + nvlist_t *props, nvlist_t *fsprops) +{ + zfs_cmd_t zc = { 0 }; + nvlist_t *zc_fsprops = NULL; + nvlist_t *zc_props = NULL; + char msg[1024]; + int ret = -1; + + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot create '%s'"), pool); + + if (!zpool_name_valid(hdl, B_FALSE, pool)) + return (zfs_error(hdl, EZFS_INVALIDNAME, msg)); + + if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) + return (-1); + + if (props) { + prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE }; + + if ((zc_props = zpool_valid_proplist(hdl, pool, props, + SPA_VERSION_1, flags, msg)) == NULL) { + goto create_failed; + } + } + + if (fsprops) { + uint64_t zoned; + char *zonestr; + + zoned = ((nvlist_lookup_string(fsprops, + zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) && + strcmp(zonestr, "on") == 0); + + if ((zc_fsprops = zfs_valid_proplist(hdl, + ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) { + goto create_failed; + } + if (!zc_props && + (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) { + goto create_failed; + } + if (nvlist_add_nvlist(zc_props, + ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) { + goto create_failed; + } + } + + if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) + goto create_failed; + + (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); + + if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) { + + zcmd_free_nvlists(&zc); + nvlist_free(zc_props); + nvlist_free(zc_fsprops); + + switch (errno) { + case EBUSY: + /* + * This can happen if the user has specified the same + * device multiple times. We can't reliably detect this + * until we try to add it and see we already have a + * label. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more vdevs refer to the same device")); + return (zfs_error(hdl, EZFS_BADDEV, msg)); + + case EOVERFLOW: + /* + * This occurs when one of the devices is below + * SPA_MINDEVSIZE. Unfortunately, we can't detect which + * device was the problem device since there's no + * reliable way to determine device size from userland. + */ + { + char buf[64]; + + zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf)); + + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is less than the " + "minimum size (%s)"), buf); + } + return (zfs_error(hdl, EZFS_BADDEV, msg)); + + case ENOSPC: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is out of space")); + return (zfs_error(hdl, EZFS_BADDEV, msg)); + + case ENOTBLK: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cache device must be a disk or disk slice")); + return (zfs_error(hdl, EZFS_BADDEV, msg)); + + default: + return (zpool_standard_error(hdl, errno, msg)); + } + } + +create_failed: + zcmd_free_nvlists(&zc); + nvlist_free(zc_props); + nvlist_free(zc_fsprops); + return (ret); +} + +/* + * Destroy the given pool. It is up to the caller to ensure that there are no + * datasets left in the pool. + */ +int +zpool_destroy(zpool_handle_t *zhp, const char *log_str) +{ + zfs_cmd_t zc = { 0 }; + zfs_handle_t *zfp = NULL; + libzfs_handle_t *hdl = zhp->zpool_hdl; + char msg[1024]; + + if (zhp->zpool_state == POOL_STATE_ACTIVE && + (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL) + return (-1); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_history = (uint64_t)(uintptr_t)log_str; + + if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot destroy '%s'"), zhp->zpool_name); + + if (errno == EROFS) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is read only")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); + } else { + (void) zpool_standard_error(hdl, errno, msg); + } + + if (zfp) + zfs_close(zfp); + return (-1); + } + + if (zfp) { + remove_mountpoint(zfp); + zfs_close(zfp); + } + + return (0); +} + +/* + * Add the given vdevs to the pool. The caller must have already performed the + * necessary verification to ensure that the vdev specification is well-formed. + */ +int +zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) +{ + zfs_cmd_t zc = { 0 }; + int ret; + libzfs_handle_t *hdl = zhp->zpool_hdl; + char msg[1024]; + nvlist_t **spares, **l2cache; + uint_t nspares, nl2cache; + + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot add to '%s'"), zhp->zpool_name); + + if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < + SPA_VERSION_SPARES && + nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, + &spares, &nspares) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be " + "upgraded to add hot spares")); + return (zfs_error(hdl, EZFS_BADVERSION, msg)); + } + + if (zpool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot, + ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { + uint64_t s; + + for (s = 0; s < nspares; s++) { + char *path; + + if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, + &path) == 0 && pool_uses_efi(spares[s])) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "device '%s' contains an EFI label and " + "cannot be used on root pools."), + zpool_vdev_name(hdl, NULL, spares[s], + B_FALSE)); + return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg)); + } + } + } + + if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < + SPA_VERSION_L2CACHE && + nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, + &l2cache, &nl2cache) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be " + "upgraded to add cache devices")); + return (zfs_error(hdl, EZFS_BADVERSION, msg)); + } + + if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) + return (-1); + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) { + switch (errno) { + case EBUSY: + /* + * This can happen if the user has specified the same + * device multiple times. We can't reliably detect this + * until we try to add it and see we already have a + * label. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more vdevs refer to the same device")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); + break; + + case EOVERFLOW: + /* + * This occurrs when one of the devices is below + * SPA_MINDEVSIZE. Unfortunately, we can't detect which + * device was the problem device since there's no + * reliable way to determine device size from userland. + */ + { + char buf[64]; + + zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf)); + + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "device is less than the minimum " + "size (%s)"), buf); + } + (void) zfs_error(hdl, EZFS_BADDEV, msg); + break; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded to add these vdevs")); + (void) zfs_error(hdl, EZFS_BADVERSION, msg); + break; + + case EDOM: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "root pool can not have multiple vdevs" + " or separate logs")); + (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg); + break; + + case ENOTBLK: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cache device must be a disk or disk slice")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); + break; + + default: + (void) zpool_standard_error(hdl, errno, msg); + } + + ret = -1; + } else { + ret = 0; + } + + zcmd_free_nvlists(&zc); + + return (ret); +} + +/* + * Exports the pool from the system. The caller must ensure that there are no + * mounted datasets in the pool. + */ +static int +zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, + const char *log_str) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot export '%s'"), zhp->zpool_name); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_cookie = force; + zc.zc_guid = hardforce; + zc.zc_history = (uint64_t)(uintptr_t)log_str; + + if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) { + switch (errno) { + case EXDEV: + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "use '-f' to override the following errors:\n" + "'%s' has an active shared spare which could be" + " used by other pools once '%s' is exported."), + zhp->zpool_name, zhp->zpool_name); + return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE, + msg)); + default: + return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, + msg)); + } + } + + return (0); +} + +int +zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str) +{ + return (zpool_export_common(zhp, force, B_FALSE, log_str)); +} + +int +zpool_export_force(zpool_handle_t *zhp, const char *log_str) +{ + return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str)); +} + +static void +zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun, + nvlist_t *config) +{ + nvlist_t *nv = NULL; + uint64_t rewindto; + int64_t loss = -1; + struct tm t; + char timestr[128]; + + if (!hdl->libzfs_printerr || config == NULL) + return; + + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 || + nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) { + return; + } + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0) + return; + (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss); + + if (localtime_r((time_t *)&rewindto, &t) != NULL && + strftime(timestr, 128, 0, &t) != 0) { + if (dryrun) { + (void) printf(dgettext(TEXT_DOMAIN, + "Would be able to return %s " + "to its state as of %s.\n"), + name, timestr); + } else { + (void) printf(dgettext(TEXT_DOMAIN, + "Pool %s returned to its state as of %s.\n"), + name, timestr); + } + if (loss > 120) { + (void) printf(dgettext(TEXT_DOMAIN, + "%s approximately %lld "), + dryrun ? "Would discard" : "Discarded", + (loss + 30) / 60); + (void) printf(dgettext(TEXT_DOMAIN, + "minutes of transactions.\n")); + } else if (loss > 0) { + (void) printf(dgettext(TEXT_DOMAIN, + "%s approximately %lld "), + dryrun ? "Would discard" : "Discarded", loss); + (void) printf(dgettext(TEXT_DOMAIN, + "seconds of transactions.\n")); + } + } +} + +void +zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason, + nvlist_t *config) +{ + nvlist_t *nv = NULL; + int64_t loss = -1; + uint64_t edata = UINT64_MAX; + uint64_t rewindto; + struct tm t; + char timestr[128]; + + if (!hdl->libzfs_printerr) + return; + + if (reason >= 0) + (void) printf(dgettext(TEXT_DOMAIN, "action: ")); + else + (void) printf(dgettext(TEXT_DOMAIN, "\t")); + + /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */ + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 || + nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 || + nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0) + goto no_info; + + (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS, + &edata); + + (void) printf(dgettext(TEXT_DOMAIN, + "Recovery is possible, but will result in some data loss.\n")); + + if (localtime_r((time_t *)&rewindto, &t) != NULL && + strftime(timestr, 128, 0, &t) != 0) { + (void) printf(dgettext(TEXT_DOMAIN, + "\tReturning the pool to its state as of %s\n" + "\tshould correct the problem. "), + timestr); + } else { + (void) printf(dgettext(TEXT_DOMAIN, + "\tReverting the pool to an earlier state " + "should correct the problem.\n\t")); + } + + if (loss > 120) { + (void) printf(dgettext(TEXT_DOMAIN, + "Approximately %lld minutes of data\n" + "\tmust be discarded, irreversibly. "), (loss + 30) / 60); + } else if (loss > 0) { + (void) printf(dgettext(TEXT_DOMAIN, + "Approximately %lld seconds of data\n" + "\tmust be discarded, irreversibly. "), loss); + } + if (edata != 0 && edata != UINT64_MAX) { + if (edata == 1) { + (void) printf(dgettext(TEXT_DOMAIN, + "After rewind, at least\n" + "\tone persistent user-data error will remain. ")); + } else { + (void) printf(dgettext(TEXT_DOMAIN, + "After rewind, several\n" + "\tpersistent user-data errors will remain. ")); + } + } + (void) printf(dgettext(TEXT_DOMAIN, + "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "), + reason >= 0 ? "clear" : "import", name); + + (void) printf(dgettext(TEXT_DOMAIN, + "A scrub of the pool\n" + "\tis strongly recommended after recovery.\n")); + return; + +no_info: + (void) printf(dgettext(TEXT_DOMAIN, + "Destroy and re-create the pool from\n\ta backup source.\n")); +} + +/* + * zpool_import() is a contracted interface. Should be kept the same + * if possible. + * + * Applications should use zpool_import_props() to import a pool with + * new properties value to be set. + */ +int +zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, + char *altroot) +{ + nvlist_t *props = NULL; + int ret; + + if (altroot != NULL) { + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { + return (zfs_error_fmt(hdl, EZFS_NOMEM, + dgettext(TEXT_DOMAIN, "cannot import '%s'"), + newname)); + } + + if (nvlist_add_string(props, + zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 || + nvlist_add_string(props, + zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) { + nvlist_free(props); + return (zfs_error_fmt(hdl, EZFS_NOMEM, + dgettext(TEXT_DOMAIN, "cannot import '%s'"), + newname)); + } + } + + ret = zpool_import_props(hdl, config, newname, props, + ZFS_IMPORT_NORMAL); + if (props) + nvlist_free(props); + return (ret); +} + +static void +print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv, + int indent) +{ + nvlist_t **child; + uint_t c, children; + char *vname; + uint64_t is_log = 0; + + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, + &is_log); + + if (name != NULL) + (void) printf("\t%*s%s%s\n", indent, "", name, + is_log ? " [log]" : ""); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE); + print_vdev_tree(hdl, vname, child[c], indent + 2); + free(vname); + } +} + +void +zpool_print_unsup_feat(nvlist_t *config) +{ + nvlist_t *nvinfo, *unsup_feat; + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == + 0); + verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT, + &unsup_feat) == 0); + + for (nvpair_t *nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL; + nvp = nvlist_next_nvpair(unsup_feat, nvp)) { + char *desc; + + verify(nvpair_type(nvp) == DATA_TYPE_STRING); + verify(nvpair_value_string(nvp, &desc) == 0); + + if (strlen(desc) > 0) + (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc); + else + (void) printf("\t%s\n", nvpair_name(nvp)); + } +} + +/* + * Import the given pool using the known configuration and a list of + * properties to be set. The configuration should have come from + * zpool_find_import(). The 'newname' parameters control whether the pool + * is imported with a different name. + */ +int +zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, + nvlist_t *props, int flags) +{ + zfs_cmd_t zc = { 0 }; + zpool_rewind_policy_t policy; + nvlist_t *nv = NULL; + nvlist_t *nvinfo = NULL; + nvlist_t *missing = NULL; + char *thename; + char *origname; + int ret; + int error = 0; + char errbuf[1024]; + + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &origname) == 0); + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot import pool '%s'"), origname); + + if (newname != NULL) { + if (!zpool_name_valid(hdl, B_FALSE, newname)) + return (zfs_error_fmt(hdl, EZFS_INVALIDNAME, + dgettext(TEXT_DOMAIN, "cannot import '%s'"), + newname)); + thename = (char *)newname; + } else { + thename = origname; + } + + if (props) { + uint64_t version; + prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE }; + + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &version) == 0); + + if ((props = zpool_valid_proplist(hdl, origname, + props, version, flags, errbuf)) == NULL) { + return (-1); + } else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { + nvlist_free(props); + return (-1); + } + } + + (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name)); + + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &zc.zc_guid) == 0); + + if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) { + nvlist_free(props); + return (-1); + } + if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) { + nvlist_free(props); + return (-1); + } + + zc.zc_cookie = flags; + while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 && + errno == ENOMEM) { + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + } + if (ret != 0) + error = errno; + + (void) zcmd_read_dst_nvlist(hdl, &zc, &nv); + zpool_get_rewind_policy(config, &policy); + + if (error) { + char desc[1024]; + + /* + * Dry-run failed, but we print out what success + * looks like if we found a best txg + */ + if (policy.zrp_request & ZPOOL_TRY_REWIND) { + zpool_rewind_exclaim(hdl, newname ? origname : thename, + B_TRUE, nv); + nvlist_free(nv); + return (-1); + } + + if (newname == NULL) + (void) snprintf(desc, sizeof (desc), + dgettext(TEXT_DOMAIN, "cannot import '%s'"), + thename); + else + (void) snprintf(desc, sizeof (desc), + dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"), + origname, thename); + + switch (error) { + case ENOTSUP: + if (nv != NULL && nvlist_lookup_nvlist(nv, + ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 && + nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) { + (void) printf(dgettext(TEXT_DOMAIN, "This " + "pool uses the following feature(s) not " + "supported by this system:\n")); + zpool_print_unsup_feat(nv); + if (nvlist_exists(nvinfo, + ZPOOL_CONFIG_CAN_RDONLY)) { + (void) printf(dgettext(TEXT_DOMAIN, + "All unsupported features are only " + "required for writing to the pool." + "\nThe pool can be imported using " + "'-o readonly=on'.\n")); + } + } + /* + * Unsupported version. + */ + (void) zfs_error(hdl, EZFS_BADVERSION, desc); + break; + + case EINVAL: + (void) zfs_error(hdl, EZFS_INVALCONFIG, desc); + break; + + case EROFS: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is read only")); + (void) zfs_error(hdl, EZFS_BADDEV, desc); + break; + + case ENXIO: + if (nv && nvlist_lookup_nvlist(nv, + ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 && + nvlist_lookup_nvlist(nvinfo, + ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) { + (void) printf(dgettext(TEXT_DOMAIN, + "The devices below are missing, use " + "'-m' to import the pool anyway:\n")); + print_vdev_tree(hdl, NULL, missing, 2); + (void) printf("\n"); + } + (void) zpool_standard_error(hdl, error, desc); + break; + + case EEXIST: + (void) zpool_standard_error(hdl, error, desc); + break; + + default: + (void) zpool_standard_error(hdl, error, desc); + zpool_explain_recover(hdl, + newname ? origname : thename, -error, nv); + break; + } + + nvlist_free(nv); + ret = -1; + } else { + zpool_handle_t *zhp; + + /* + * This should never fail, but play it safe anyway. + */ + if (zpool_open_silent(hdl, thename, &zhp) != 0) + ret = -1; + else if (zhp != NULL) + zpool_close(zhp); + if (policy.zrp_request & + (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) { + zpool_rewind_exclaim(hdl, newname ? origname : thename, + ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv); + } + nvlist_free(nv); + return (0); + } + + zcmd_free_nvlists(&zc); + nvlist_free(props); + + return (ret); +} + +/* + * Scan the pool. + */ +int +zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_cookie = func; + + if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 || + (errno == ENOENT && func != POOL_SCAN_NONE)) + return (0); + + if (func == POOL_SCAN_SCRUB) { + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name); + } else if (func == POOL_SCAN_NONE) { + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"), + zc.zc_name); + } else { + assert(!"unexpected result"); + } + + if (errno == EBUSY) { + nvlist_t *nvroot; + pool_scan_stat_t *ps = NULL; + uint_t psc; + + verify(nvlist_lookup_nvlist(zhp->zpool_config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc); + if (ps && ps->pss_func == POOL_SCAN_SCRUB) + return (zfs_error(hdl, EZFS_SCRUBBING, msg)); + else + return (zfs_error(hdl, EZFS_RESILVERING, msg)); + } else if (errno == ENOENT) { + return (zfs_error(hdl, EZFS_NO_SCRUB, msg)); + } else { + return (zpool_standard_error(hdl, errno, msg)); + } +} + +/* + * This provides a very minimal check whether a given string is likely a + * c#t#d# style string. Users of this are expected to do their own + * verification of the s# part. + */ +#define CTD_CHECK(str) (str && str[0] == 'c' && isdigit(str[1])) + +/* + * More elaborate version for ones which may start with "/dev/dsk/" + * and the like. + */ +static int +ctd_check_path(char *str) { + /* + * If it starts with a slash, check the last component. + */ + if (str && str[0] == '/') { + char *tmp = strrchr(str, '/'); + + /* + * If it ends in "/old", check the second-to-last + * component of the string instead. + */ + if (tmp != str && strcmp(tmp, "/old") == 0) { + for (tmp--; *tmp != '/'; tmp--) + ; + } + str = tmp + 1; + } + return (CTD_CHECK(str)); +} + +/* + * Find a vdev that matches the search criteria specified. We use the + * the nvpair name to determine how we should look for the device. + * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL + * spare; but FALSE if its an INUSE spare. + */ +static nvlist_t * +vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, + boolean_t *l2cache, boolean_t *log) +{ + uint_t c, children; + nvlist_t **child; + nvlist_t *ret; + uint64_t is_log; + char *srchkey; + nvpair_t *pair = nvlist_next_nvpair(search, NULL); + + /* Nothing to look for */ + if (search == NULL || pair == NULL) + return (NULL); + + /* Obtain the key we will use to search */ + srchkey = nvpair_name(pair); + + switch (nvpair_type(pair)) { + case DATA_TYPE_UINT64: + if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) { + uint64_t srchval, theguid; + + verify(nvpair_value_uint64(pair, &srchval) == 0); + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, + &theguid) == 0); + if (theguid == srchval) + return (nv); + } + break; + + case DATA_TYPE_STRING: { + char *srchval, *val; + + verify(nvpair_value_string(pair, &srchval) == 0); + if (nvlist_lookup_string(nv, srchkey, &val) != 0) + break; + + /* + * Search for the requested value. Special cases: + * + * - ZPOOL_CONFIG_PATH for whole disk entries. These end in + * "s0" or "s0/old". The "s0" part is hidden from the user, + * but included in the string, so this matches around it. + * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE). + * + * Otherwise, all other searches are simple string compares. + */ + if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && + ctd_check_path(val)) { + uint64_t wholedisk = 0; + + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk); + if (wholedisk) { + int slen = strlen(srchval); + int vlen = strlen(val); + + if (slen != vlen - 2) + break; + + /* + * make_leaf_vdev() should only set + * wholedisk for ZPOOL_CONFIG_PATHs which + * will include "/dev/dsk/", giving plenty of + * room for the indices used next. + */ + ASSERT(vlen >= 6); + + /* + * strings identical except trailing "s0" + */ + if (strcmp(&val[vlen - 2], "s0") == 0 && + strncmp(srchval, val, slen) == 0) + return (nv); + + /* + * strings identical except trailing "s0/old" + */ + if (strcmp(&val[vlen - 6], "s0/old") == 0 && + strcmp(&srchval[slen - 4], "/old") == 0 && + strncmp(srchval, val, slen - 4) == 0) + return (nv); + + break; + } + } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) { + char *type, *idx, *end, *p; + uint64_t id, vdev_id; + + /* + * Determine our vdev type, keeping in mind + * that the srchval is composed of a type and + * vdev id pair (i.e. mirror-4). + */ + if ((type = strdup(srchval)) == NULL) + return (NULL); + + if ((p = strrchr(type, '-')) == NULL) { + free(type); + break; + } + idx = p + 1; + *p = '\0'; + + /* + * If the types don't match then keep looking. + */ + if (strncmp(val, type, strlen(val)) != 0) { + free(type); + break; + } + + verify(strncmp(type, VDEV_TYPE_RAIDZ, + strlen(VDEV_TYPE_RAIDZ)) == 0 || + strncmp(type, VDEV_TYPE_MIRROR, + strlen(VDEV_TYPE_MIRROR)) == 0); + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, + &id) == 0); + + errno = 0; + vdev_id = strtoull(idx, &end, 10); + + free(type); + if (errno != 0) + return (NULL); + + /* + * Now verify that we have the correct vdev id. + */ + if (vdev_id == id) + return (nv); + } + + /* + * Common case + */ + if (strcmp(srchval, val) == 0) + return (nv); + break; + } + + default: + break; + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return (NULL); + + for (c = 0; c < children; c++) { + if ((ret = vdev_to_nvlist_iter(child[c], search, + avail_spare, l2cache, NULL)) != NULL) { + /* + * The 'is_log' value is only set for the toplevel + * vdev, not the leaf vdevs. So we always lookup the + * log device from the root of the vdev tree (where + * 'log' is non-NULL). + */ + if (log != NULL && + nvlist_lookup_uint64(child[c], + ZPOOL_CONFIG_IS_LOG, &is_log) == 0 && + is_log) { + *log = B_TRUE; + } + return (ret); + } + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if ((ret = vdev_to_nvlist_iter(child[c], search, + avail_spare, l2cache, NULL)) != NULL) { + *avail_spare = B_TRUE; + return (ret); + } + } + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if ((ret = vdev_to_nvlist_iter(child[c], search, + avail_spare, l2cache, NULL)) != NULL) { + *l2cache = B_TRUE; + return (ret); + } + } + } + + return (NULL); +} + +/* + * Given a physical path (minus the "/devices" prefix), find the + * associated vdev. + */ +nvlist_t * +zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, + boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) +{ + nvlist_t *search, *nvroot, *ret; + + verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); + verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0); + + verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + *avail_spare = B_FALSE; + *l2cache = B_FALSE; + if (log != NULL) + *log = B_FALSE; + ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); + nvlist_free(search); + + return (ret); +} + +/* + * Determine if we have an "interior" top-level vdev (i.e mirror/raidz). + */ +boolean_t +zpool_vdev_is_interior(const char *name) +{ + if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 || + strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0) + return (B_TRUE); + return (B_FALSE); +} + +nvlist_t * +zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, + boolean_t *l2cache, boolean_t *log) +{ + char buf[MAXPATHLEN]; + char *end; + nvlist_t *nvroot, *search, *ret; + uint64_t guid; + + verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + guid = strtoull(path, &end, 10); + if (guid != 0 && *end == '\0') { + verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); + } else if (zpool_vdev_is_interior(path)) { + verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0); + } else if (path[0] != '/') { + (void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path); + verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0); + } else { + verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0); + } + + verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + *avail_spare = B_FALSE; + *l2cache = B_FALSE; + if (log != NULL) + *log = B_FALSE; + ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); + nvlist_free(search); + + return (ret); +} + +static int +vdev_online(nvlist_t *nv) +{ + uint64_t ival; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 || + nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 || + nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0) + return (0); + + return (1); +} + +/* + * Helper function for zpool_get_physpaths(). + */ +static int +vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size, + size_t *bytes_written) +{ + size_t bytes_left, pos, rsz; + char *tmppath; + const char *format; + + if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH, + &tmppath) != 0) + return (EZFS_NODEVICE); + + pos = *bytes_written; + bytes_left = physpath_size - pos; + format = (pos == 0) ? "%s" : " %s"; + + rsz = snprintf(physpath + pos, bytes_left, format, tmppath); + *bytes_written += rsz; + + if (rsz >= bytes_left) { + /* if physpath was not copied properly, clear it */ + if (bytes_left != 0) { + physpath[pos] = 0; + } + return (EZFS_NOSPC); + } + return (0); +} + +static int +vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size, + size_t *rsz, boolean_t is_spare) +{ + char *type; + int ret; + + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) + return (EZFS_INVALCONFIG); + + if (strcmp(type, VDEV_TYPE_DISK) == 0) { + /* + * An active spare device has ZPOOL_CONFIG_IS_SPARE set. + * For a spare vdev, we only want to boot from the active + * spare device. + */ + if (is_spare) { + uint64_t spare = 0; + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE, + &spare); + if (!spare) + return (EZFS_INVALCONFIG); + } + + if (vdev_online(nv)) { + if ((ret = vdev_get_one_physpath(nv, physpath, + phypath_size, rsz)) != 0) + return (ret); + } + } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || + strcmp(type, VDEV_TYPE_REPLACING) == 0 || + (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { + nvlist_t **child; + uint_t count; + int i, ret; + + if (nvlist_lookup_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, &child, &count) != 0) + return (EZFS_INVALCONFIG); + + for (i = 0; i < count; i++) { + ret = vdev_get_physpaths(child[i], physpath, + phypath_size, rsz, is_spare); + if (ret == EZFS_NOSPC) + return (ret); + } + } + + return (EZFS_POOL_INVALARG); +} + +/* + * Get phys_path for a root pool config. + * Return 0 on success; non-zero on failure. + */ +static int +zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size) +{ + size_t rsz; + nvlist_t *vdev_root; + nvlist_t **child; + uint_t count; + char *type; + + rsz = 0; + + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &vdev_root) != 0) + return (EZFS_INVALCONFIG); + + if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 || + nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN, + &child, &count) != 0) + return (EZFS_INVALCONFIG); + + /* + * root pool can not have EFI labeled disks and can only have + * a single top-level vdev. + */ + if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 || + pool_uses_efi(vdev_root)) + return (EZFS_POOL_INVALARG); + + (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz, + B_FALSE); + + /* No online devices */ + if (rsz == 0) + return (EZFS_NODEVICE); + + return (0); +} + +/* + * Get phys_path for a root pool + * Return 0 on success; non-zero on failure. + */ +int +zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size) +{ + return (zpool_get_config_physpath(zhp->zpool_config, physpath, + phypath_size)); +} + +/* + * If the device has being dynamically expanded then we need to relabel + * the disk to use the new unallocated space. + */ +static int +zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) +{ +#ifdef sun + char path[MAXPATHLEN]; + char errbuf[1024]; + int fd, error; + int (*_efi_use_whole_disk)(int); + + if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT, + "efi_use_whole_disk")) == NULL) + return (-1); + + (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name); + + if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " + "relabel '%s': unable to open device"), name); + return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); + } + + /* + * It's possible that we might encounter an error if the device + * does not have any unallocated space left. If so, we simply + * ignore that error and continue on. + */ + error = _efi_use_whole_disk(fd); + (void) close(fd); + if (error && error != VT_ENOSPC) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " + "relabel '%s': unable to read disk capacity"), name); + return (zfs_error(hdl, EZFS_NOCAP, errbuf)); + } +#endif /* sun */ + return (0); +} + +/* + * Bring the specified vdev online. The 'flags' parameter is a set of the + * ZFS_ONLINE_* flags. + */ +int +zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, + vdev_state_t *newstate) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + nvlist_t *tgt; + boolean_t avail_spare, l2cache, islog; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + if (flags & ZFS_ONLINE_EXPAND) { + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot expand %s"), path); + } else { + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot online %s"), path); + } + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, + &islog)) == NULL) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); + + if (avail_spare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); + + if (flags & ZFS_ONLINE_EXPAND || + zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) { + char *pathname = NULL; + uint64_t wholedisk = 0; + + (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk); + verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, + &pathname) == 0); + + /* + * XXX - L2ARC 1.0 devices can't support expansion. + */ + if (l2cache) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot expand cache devices")); + return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg)); + } + + if (wholedisk) { + pathname += strlen(DISK_ROOT) + 1; + (void) zpool_relabel_disk(hdl, pathname); + } + } + + zc.zc_cookie = VDEV_STATE_ONLINE; + zc.zc_obj = flags; + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) { + if (errno == EINVAL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split " + "from this pool into a new one. Use '%s' " + "instead"), "zpool detach"); + return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg)); + } + return (zpool_standard_error(hdl, errno, msg)); + } + + *newstate = zc.zc_cookie; + return (0); +} + +/* + * Take the specified vdev offline + */ +int +zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + nvlist_t *tgt; + boolean_t avail_spare, l2cache; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot offline %s"), path); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, + NULL)) == NULL) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); + + if (avail_spare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); + + zc.zc_cookie = VDEV_STATE_OFFLINE; + zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0; + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) + return (0); + + switch (errno) { + case EBUSY: + + /* + * There are no other replicas of this device. + */ + return (zfs_error(hdl, EZFS_NOREPLICAS, msg)); + + case EEXIST: + /* + * The log device has unplayed logs + */ + return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg)); + + default: + return (zpool_standard_error(hdl, errno, msg)); + } +} + +/* + * Mark the given vdev faulted. + */ +int +zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_guid = guid; + zc.zc_cookie = VDEV_STATE_FAULTED; + zc.zc_obj = aux; + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) + return (0); + + switch (errno) { + case EBUSY: + + /* + * There are no other replicas of this device. + */ + return (zfs_error(hdl, EZFS_NOREPLICAS, msg)); + + default: + return (zpool_standard_error(hdl, errno, msg)); + } + +} + +/* + * Mark the given vdev degraded. + */ +int +zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_guid = guid; + zc.zc_cookie = VDEV_STATE_DEGRADED; + zc.zc_obj = aux; + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, msg)); +} + +/* + * Returns TRUE if the given nvlist is a vdev that was originally swapped in as + * a hot spare. + */ +static boolean_t +is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which) +{ + nvlist_t **child; + uint_t c, children; + char *type; + + if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child, + &children) == 0) { + verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE, + &type) == 0); + + if (strcmp(type, VDEV_TYPE_SPARE) == 0 && + children == 2 && child[which] == tgt) + return (B_TRUE); + + for (c = 0; c < children; c++) + if (is_replacing_spare(child[c], tgt, which)) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Attach new_disk (fully described by nvroot) to old_disk. + * If 'replacing' is specified, the new disk will replace the old one. + */ +int +zpool_vdev_attach(zpool_handle_t *zhp, + const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + int ret; + nvlist_t *tgt; + boolean_t avail_spare, l2cache, islog; + uint64_t val; + char *newname; + nvlist_t **child; + uint_t children; + nvlist_t *config_root; + libzfs_handle_t *hdl = zhp->zpool_hdl; + boolean_t rootpool = zpool_is_bootable(zhp); + + if (replacing) + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot replace %s with %s"), old_disk, new_disk); + else + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot attach %s to %s"), new_disk, old_disk); + + /* + * If this is a root pool, make sure that we're not attaching an + * EFI labeled device. + */ + if (rootpool && pool_uses_efi(nvroot)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "EFI labeled devices are not supported on root pools.")); + return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg)); + } + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache, + &islog)) == 0) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + + if (avail_spare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); + + if (l2cache) + return (zfs_error(hdl, EZFS_ISL2CACHE, msg)); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); + zc.zc_cookie = replacing; + + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0 || children != 1) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "new device must be a single disk")); + return (zfs_error(hdl, EZFS_INVALCONFIG, msg)); + } + + verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), + ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0); + + if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL) + return (-1); + + /* + * If the target is a hot spare that has been swapped in, we can only + * replace it with another hot spare. + */ + if (replacing && + nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 && + (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache, + NULL) == NULL || !avail_spare) && + is_replacing_spare(config_root, tgt, 1)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "can only be replaced by another hot spare")); + free(newname); + return (zfs_error(hdl, EZFS_BADTARGET, msg)); + } + + free(newname); + + if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) + return (-1); + + ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc); + + zcmd_free_nvlists(&zc); + + if (ret == 0) { + if (rootpool) { + /* + * XXX need a better way to prevent user from + * booting up a half-baked vdev. + */ + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make " + "sure to wait until resilver is done " + "before rebooting.\n")); + (void) fprintf(stderr, "\n"); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "If " + "you boot from pool '%s', you may need to update\n" + "boot code on newly attached disk '%s'.\n\n" + "Assuming you use GPT partitioning and 'da0' is " + "your new boot disk\n" + "you may use the following command:\n\n" + "\tgpart bootcode -b /boot/pmbr -p " + "/boot/gptzfsboot -i 1 da0\n\n"), + zhp->zpool_name, new_disk); + } + return (0); + } + + switch (errno) { + case ENOTSUP: + /* + * Can't attach to or replace this type of vdev. + */ + if (replacing) { + uint64_t version = zpool_get_prop_int(zhp, + ZPOOL_PROP_VERSION, NULL); + + if (islog) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot replace a log with a spare")); + else if (version >= SPA_VERSION_MULTI_REPLACE) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "already in replacing/spare config; wait " + "for completion or use 'zpool detach'")); + else + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot replace a replacing device")); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "can only attach to mirrors and top-level " + "disks")); + } + (void) zfs_error(hdl, EZFS_BADTARGET, msg); + break; + + case EINVAL: + /* + * The new device must be a single disk. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "new device must be a single disk")); + (void) zfs_error(hdl, EZFS_INVALCONFIG, msg); + break; + + case EBUSY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"), + new_disk); + (void) zfs_error(hdl, EZFS_BADDEV, msg); + break; + + case EOVERFLOW: + /* + * The new device is too small. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "device is too small")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); + break; + + case EDOM: + /* + * The new device has a different alignment requirement. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "devices have different sector alignment")); + (void) zfs_error(hdl, EZFS_BADDEV, msg); + break; + + case ENAMETOOLONG: + /* + * The resulting top-level vdev spec won't fit in the label. + */ + (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg); + break; + + default: + (void) zpool_standard_error(hdl, errno, msg); + } + + return (-1); +} + +/* + * Detach the specified device. + */ +int +zpool_vdev_detach(zpool_handle_t *zhp, const char *path) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + nvlist_t *tgt; + boolean_t avail_spare, l2cache; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot detach %s"), path); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, + NULL)) == 0) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + + if (avail_spare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); + + if (l2cache) + return (zfs_error(hdl, EZFS_ISL2CACHE, msg)); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0) + return (0); + + switch (errno) { + + case ENOTSUP: + /* + * Can't detach from this type of vdev. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only " + "applicable to mirror and replacing vdevs")); + (void) zfs_error(hdl, EZFS_BADTARGET, msg); + break; + + case EBUSY: + /* + * There are no other replicas of this device. + */ + (void) zfs_error(hdl, EZFS_NOREPLICAS, msg); + break; + + default: + (void) zpool_standard_error(hdl, errno, msg); + } + + return (-1); +} + +/* + * Find a mirror vdev in the source nvlist. + * + * The mchild array contains a list of disks in one of the top-level mirrors + * of the source pool. The schild array contains a list of disks that the + * user specified on the command line. We loop over the mchild array to + * see if any entry in the schild array matches. + * + * If a disk in the mchild array is found in the schild array, we return + * the index of that entry. Otherwise we return -1. + */ +static int +find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren, + nvlist_t **schild, uint_t schildren) +{ + uint_t mc; + + for (mc = 0; mc < mchildren; mc++) { + uint_t sc; + char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp, + mchild[mc], B_FALSE); + + for (sc = 0; sc < schildren; sc++) { + char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp, + schild[sc], B_FALSE); + boolean_t result = (strcmp(mpath, spath) == 0); + + free(spath); + if (result) { + free(mpath); + return (mc); + } + } + + free(mpath); + } + + return (-1); +} + +/* + * Split a mirror pool. If newroot points to null, then a new nvlist + * is generated and it is the responsibility of the caller to free it. + */ +int +zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot, + nvlist_t *props, splitflags_t flags) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL; + nvlist_t **varray = NULL, *zc_props = NULL; + uint_t c, children, newchildren, lastlog = 0, vcount, found = 0; + libzfs_handle_t *hdl = zhp->zpool_hdl; + uint64_t vers; + boolean_t freelist = B_FALSE, memory_err = B_TRUE; + int retval = 0; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name); + + if (!zpool_name_valid(hdl, B_FALSE, newname)) + return (zfs_error(hdl, EZFS_INVALIDNAME, msg)); + + if ((config = zpool_get_config(zhp, NULL)) == NULL) { + (void) fprintf(stderr, gettext("Internal error: unable to " + "retrieve pool configuration\n")); + return (-1); + } + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) + == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0); + + if (props) { + prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE }; + if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name, + props, vers, flags, msg)) == NULL) + return (-1); + } + + if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, + &children) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Source pool is missing vdev tree")); + if (zc_props) + nvlist_free(zc_props); + return (-1); + } + + varray = zfs_alloc(hdl, children * sizeof (nvlist_t *)); + vcount = 0; + + if (*newroot == NULL || + nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, + &newchild, &newchildren) != 0) + newchildren = 0; + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE, is_hole = B_FALSE; + char *type; + nvlist_t **mchild, *vdev; + uint_t mchildren; + int entry; + + /* + * Unlike cache & spares, slogs are stored in the + * ZPOOL_CONFIG_CHILDREN array. We filter them out here. + */ + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, + &is_hole); + if (is_log || is_hole) { + /* + * Create a hole vdev and put it in the config. + */ + if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0) + goto out; + if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_HOLE) != 0) + goto out; + if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE, + 1) != 0) + goto out; + if (lastlog == 0) + lastlog = vcount; + varray[vcount++] = vdev; + continue; + } + lastlog = 0; + verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type) + == 0); + if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Source pool must be composed only of mirrors\n")); + retval = zfs_error(hdl, EZFS_INVALCONFIG, msg); + goto out; + } + + verify(nvlist_lookup_nvlist_array(child[c], + ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); + + /* find or add an entry for this top-level vdev */ + if (newchildren > 0 && + (entry = find_vdev_entry(zhp, mchild, mchildren, + newchild, newchildren)) >= 0) { + /* We found a disk that the user specified. */ + vdev = mchild[entry]; + ++found; + } else { + /* User didn't specify a disk for this vdev. */ + vdev = mchild[mchildren - 1]; + } + + if (nvlist_dup(vdev, &varray[vcount++], 0) != 0) + goto out; + } + + /* did we find every disk the user specified? */ + if (found != newchildren) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must " + "include at most one disk from each mirror")); + retval = zfs_error(hdl, EZFS_INVALCONFIG, msg); + goto out; + } + + /* Prepare the nvlist for populating. */ + if (*newroot == NULL) { + if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0) + goto out; + freelist = B_TRUE; + if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_ROOT) != 0) + goto out; + } else { + verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0); + } + + /* Add all the children we found */ + if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray, + lastlog == 0 ? vcount : lastlog) != 0) + goto out; + + /* + * If we're just doing a dry run, exit now with success. + */ + if (flags.dryrun) { + memory_err = B_FALSE; + freelist = B_FALSE; + goto out; + } + + /* now build up the config list & call the ioctl */ + if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0) + goto out; + + if (nvlist_add_nvlist(newconfig, + ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 || + nvlist_add_string(newconfig, + ZPOOL_CONFIG_POOL_NAME, newname) != 0 || + nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0) + goto out; + + /* + * The new pool is automatically part of the namespace unless we + * explicitly export it. + */ + if (!flags.import) + zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT; + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string)); + if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0) + goto out; + if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) + goto out; + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) { + retval = zpool_standard_error(hdl, errno, msg); + goto out; + } + + freelist = B_FALSE; + memory_err = B_FALSE; + +out: + if (varray != NULL) { + int v; + + for (v = 0; v < vcount; v++) + nvlist_free(varray[v]); + free(varray); + } + zcmd_free_nvlists(&zc); + if (zc_props) + nvlist_free(zc_props); + if (newconfig) + nvlist_free(newconfig); + if (freelist) { + nvlist_free(*newroot); + *newroot = NULL; + } + + if (retval != 0) + return (retval); + + if (memory_err) + return (no_memory(hdl)); + + return (0); +} + +/* + * Remove the given device. Currently, this is supported only for hot spares + * and level 2 cache devices. + */ +int +zpool_vdev_remove(zpool_handle_t *zhp, const char *path) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + nvlist_t *tgt; + boolean_t avail_spare, l2cache, islog; + libzfs_handle_t *hdl = zhp->zpool_hdl; + uint64_t version; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot remove %s"), path); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, + &islog)) == 0) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + /* + * XXX - this should just go away. + */ + if (!avail_spare && !l2cache && !islog) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "only inactive hot spares, cache, top-level, " + "or log devices can be removed")); + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + } + + version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); + if (islog && version < SPA_VERSION_HOLES) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgrade to support log removal")); + return (zfs_error(hdl, EZFS_BADVERSION, msg)); + } + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, msg)); +} + +/* + * Clear the errors for the pool, or the particular device if specified. + */ +int +zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + nvlist_t *tgt; + zpool_rewind_policy_t policy; + boolean_t avail_spare, l2cache; + libzfs_handle_t *hdl = zhp->zpool_hdl; + nvlist_t *nvi = NULL; + int error; + + if (path) + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot clear errors for %s"), + path); + else + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot clear errors for %s"), + zhp->zpool_name); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if (path) { + if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, + &l2cache, NULL)) == 0) + return (zfs_error(hdl, EZFS_NODEVICE, msg)); + + /* + * Don't allow error clearing for hot spares. Do allow + * error clearing for l2cache devices. + */ + if (avail_spare) + return (zfs_error(hdl, EZFS_ISSPARE, msg)); + + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, + &zc.zc_guid) == 0); + } + + zpool_get_rewind_policy(rewindnvl, &policy); + zc.zc_cookie = policy.zrp_request; + + if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0) + return (-1); + + if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0) + return (-1); + + while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 && + errno == ENOMEM) { + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + zcmd_free_nvlists(&zc); + return (-1); + } + } + + if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) && + errno != EPERM && errno != EACCES)) { + if (policy.zrp_request & + (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) { + (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi); + zpool_rewind_exclaim(hdl, zc.zc_name, + ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), + nvi); + nvlist_free(nvi); + } + zcmd_free_nvlists(&zc); + return (0); + } + + zcmd_free_nvlists(&zc); + return (zpool_standard_error(hdl, errno, msg)); +} + +/* + * Similar to zpool_clear(), but takes a GUID (used by fmd). + */ +int +zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"), + guid); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_guid = guid; + zc.zc_cookie = ZPOOL_NO_REWIND; + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, msg)); +} + +/* + * Change the GUID for a pool. + */ +int +zpool_reguid(zpool_handle_t *zhp) +{ + char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; + zfs_cmd_t zc = { 0 }; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0) + return (0); + + return (zpool_standard_error(hdl, errno, msg)); +} + +/* + * Reopen the pool. + */ +int +zpool_reopen(zpool_handle_t *zhp) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), + zhp->zpool_name); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0) + return (0); + return (zpool_standard_error(hdl, errno, msg)); +} + +/* + * Convert from a devid string to a path. + */ +static char * +devid_to_path(char *devid_str) +{ + ddi_devid_t devid; + char *minor; + char *path; + devid_nmlist_t *list = NULL; + int ret; + + if (devid_str_decode(devid_str, &devid, &minor) != 0) + return (NULL); + + ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list); + + devid_str_free(minor); + devid_free(devid); + + if (ret != 0) + return (NULL); + + if ((path = strdup(list[0].devname)) == NULL) + return (NULL); + + devid_free_nmlist(list); + + return (path); +} + +/* + * Convert from a path to a devid string. + */ +static char * +path_to_devid(const char *path) +{ +#ifdef have_devid + int fd; + ddi_devid_t devid; + char *minor, *ret; + + if ((fd = open(path, O_RDONLY)) < 0) + return (NULL); + + minor = NULL; + ret = NULL; + if (devid_get(fd, &devid) == 0) { + if (devid_get_minor_name(fd, &minor) == 0) + ret = devid_str_encode(devid, minor); + if (minor != NULL) + devid_str_free(minor); + devid_free(devid); + } + (void) close(fd); + + return (ret); +#else + return (NULL); +#endif +} + +/* + * Issue the necessary ioctl() to update the stored path value for the vdev. We + * ignore any failure here, since a common case is for an unprivileged user to + * type 'zpool status', and we'll display the correct information anyway. + */ +static void +set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) +{ + zfs_cmd_t zc = { 0 }; + + (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value)); + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, + &zc.zc_guid) == 0); + + (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc); +} + +/* + * Given a vdev, return the name to display in iostat. If the vdev has a path, + * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type. + * We also check if this is a whole disk, in which case we strip off the + * trailing 's0' slice name. + * + * This routine is also responsible for identifying when disks have been + * reconfigured in a new location. The kernel will have opened the device by + * devid, but the path will still refer to the old location. To catch this, we + * first do a path -> devid translation (which is fast for the common case). If + * the devid matches, we're done. If not, we do a reverse devid -> path + * translation and issue the appropriate ioctl() to update the path of the vdev. + * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any + * of these checks. + */ +char * +zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, + boolean_t verbose) +{ + char *path, *devid; + uint64_t value; + char buf[64]; + vdev_stat_t *vs; + uint_t vsc; + int have_stats; + int have_path; + + have_stats = nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0; + have_path = nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0; + + /* + * If the device is not currently present, assume it will not + * come back at the same device path. Display the device by GUID. + */ + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 || + have_path && have_stats && vs->vs_state <= VDEV_STATE_CANT_OPEN) { + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, + &value) == 0); + (void) snprintf(buf, sizeof (buf), "%llu", + (u_longlong_t)value); + path = buf; + } else if (have_path) { + + /* + * If the device is dead (faulted, offline, etc) then don't + * bother opening it. Otherwise we may be forcing the user to + * open a misbehaving device, which can have undesirable + * effects. + */ + if ((have_stats == 0 || + vs->vs_state >= VDEV_STATE_DEGRADED) && + zhp != NULL && + nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) { + /* + * Determine if the current path is correct. + */ + char *newdevid = path_to_devid(path); + + if (newdevid == NULL || + strcmp(devid, newdevid) != 0) { + char *newpath; + + if ((newpath = devid_to_path(devid)) != NULL) { + /* + * Update the path appropriately. + */ + set_path(zhp, nv, newpath); + if (nvlist_add_string(nv, + ZPOOL_CONFIG_PATH, newpath) == 0) + verify(nvlist_lookup_string(nv, + ZPOOL_CONFIG_PATH, + &path) == 0); + free(newpath); + } + } + + if (newdevid) + devid_str_free(newdevid); + } + +#ifdef sun + if (strncmp(path, "/dev/dsk/", 9) == 0) + path += 9; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &value) == 0 && value) { + int pathlen = strlen(path); + char *tmp = zfs_strdup(hdl, path); + + /* + * If it starts with c#, and ends with "s0", chop + * the "s0" off, or if it ends with "s0/old", remove + * the "s0" from the middle. + */ + if (CTD_CHECK(tmp)) { + if (strcmp(&tmp[pathlen - 2], "s0") == 0) { + tmp[pathlen - 2] = '\0'; + } else if (pathlen > 6 && + strcmp(&tmp[pathlen - 6], "s0/old") == 0) { + (void) strcpy(&tmp[pathlen - 6], + "/old"); + } + } + return (tmp); + } +#else /* !sun */ + if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) + path += sizeof(_PATH_DEV) - 1; +#endif /* !sun */ + } else { + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0); + + /* + * If it's a raidz device, we need to stick in the parity level. + */ + if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) { + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, + &value) == 0); + (void) snprintf(buf, sizeof (buf), "%s%llu", path, + (u_longlong_t)value); + path = buf; + } + + /* + * We identify each top-level vdev by using a <type-id> + * naming convention. + */ + if (verbose) { + uint64_t id; + + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, + &id) == 0); + (void) snprintf(buf, sizeof (buf), "%s-%llu", path, + (u_longlong_t)id); + path = buf; + } + } + + return (zfs_strdup(hdl, path)); +} + +static int +zbookmark_compare(const void *a, const void *b) +{ + return (memcmp(a, b, sizeof (zbookmark_phys_t))); +} + +/* + * Retrieve the persistent error log, uniquify the members, and return to the + * caller. + */ +int +zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) +{ + zfs_cmd_t zc = { 0 }; + uint64_t count; + zbookmark_phys_t *zb = NULL; + int i; + + /* + * Retrieve the raw error list from the kernel. If the number of errors + * has increased, allocate more space and continue until we get the + * entire list. + */ + verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT, + &count) == 0); + if (count == 0) + return (0); + if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl, + count * sizeof (zbookmark_phys_t))) == (uintptr_t)NULL) + return (-1); + zc.zc_nvlist_dst_size = count; + (void) strcpy(zc.zc_name, zhp->zpool_name); + for (;;) { + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG, + &zc) != 0) { + free((void *)(uintptr_t)zc.zc_nvlist_dst); + if (errno == ENOMEM) { + void *dst; + + count = zc.zc_nvlist_dst_size; + dst = zfs_alloc(zhp->zpool_hdl, count * + sizeof (zbookmark_phys_t)); + if (dst == NULL) + return (-1); + zc.zc_nvlist_dst = (uintptr_t)dst; + } else { + return (-1); + } + } else { + break; + } + } + + /* + * Sort the resulting bookmarks. This is a little confusing due to the + * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last + * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks + * _not_ copied as part of the process. So we point the start of our + * array appropriate and decrement the total number of elements. + */ + zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) + + zc.zc_nvlist_dst_size; + count -= zc.zc_nvlist_dst_size; + + qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_compare); + + verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0); + + /* + * Fill in the nverrlistp with nvlist's of dataset and object numbers. + */ + for (i = 0; i < count; i++) { + nvlist_t *nv; + + /* ignoring zb_blkid and zb_level for now */ + if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset && + zb[i-1].zb_object == zb[i].zb_object) + continue; + + if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0) + goto nomem; + if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET, + zb[i].zb_objset) != 0) { + nvlist_free(nv); + goto nomem; + } + if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT, + zb[i].zb_object) != 0) { + nvlist_free(nv); + goto nomem; + } + if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) { + nvlist_free(nv); + goto nomem; + } + nvlist_free(nv); + } + + free((void *)(uintptr_t)zc.zc_nvlist_dst); + return (0); + +nomem: + free((void *)(uintptr_t)zc.zc_nvlist_dst); + return (no_memory(zhp->zpool_hdl)); +} + +/* + * Upgrade a ZFS pool to the latest on-disk version. + */ +int +zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) strcpy(zc.zc_name, zhp->zpool_name); + zc.zc_cookie = new_version; + + if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0) + return (zpool_standard_error_fmt(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"), + zhp->zpool_name)); + return (0); +} + +void +zfs_save_arguments(int argc, char **argv, char *string, int len) +{ + (void) strlcpy(string, basename(argv[0]), len); + for (int i = 1; i < argc; i++) { + (void) strlcat(string, " ", len); + (void) strlcat(string, argv[i], len); + } +} + +int +zpool_log_history(libzfs_handle_t *hdl, const char *message) +{ + zfs_cmd_t zc = { 0 }; + nvlist_t *args; + int err; + + args = fnvlist_alloc(); + fnvlist_add_string(args, "message", message); + err = zcmd_write_src_nvlist(hdl, &zc, args); + if (err == 0) + err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc); + nvlist_free(args); + zcmd_free_nvlists(&zc); + return (err); +} + +/* + * Perform ioctl to get some command history of a pool. + * + * 'buf' is the buffer to fill up to 'len' bytes. 'off' is the + * logical offset of the history buffer to start reading from. + * + * Upon return, 'off' is the next logical offset to read from and + * 'len' is the actual amount of bytes read into 'buf'. + */ +static int +get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + + zc.zc_history = (uint64_t)(uintptr_t)buf; + zc.zc_history_len = *len; + zc.zc_history_offset = *off; + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) { + switch (errno) { + case EPERM: + return (zfs_error_fmt(hdl, EZFS_PERM, + dgettext(TEXT_DOMAIN, + "cannot show history for pool '%s'"), + zhp->zpool_name)); + case ENOENT: + return (zfs_error_fmt(hdl, EZFS_NOHISTORY, + dgettext(TEXT_DOMAIN, "cannot get history for pool " + "'%s'"), zhp->zpool_name)); + case ENOTSUP: + return (zfs_error_fmt(hdl, EZFS_BADVERSION, + dgettext(TEXT_DOMAIN, "cannot get history for pool " + "'%s', pool must be upgraded"), zhp->zpool_name)); + default: + return (zpool_standard_error_fmt(hdl, errno, + dgettext(TEXT_DOMAIN, + "cannot get history for '%s'"), zhp->zpool_name)); + } + } + + *len = zc.zc_history_len; + *off = zc.zc_history_offset; + + return (0); +} + +/* + * Process the buffer of nvlists, unpacking and storing each nvlist record + * into 'records'. 'leftover' is set to the number of bytes that weren't + * processed as there wasn't a complete record. + */ +int +zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover, + nvlist_t ***records, uint_t *numrecords) +{ + uint64_t reclen; + nvlist_t *nv; + int i; + + while (bytes_read > sizeof (reclen)) { + + /* get length of packed record (stored as little endian) */ + for (i = 0, reclen = 0; i < sizeof (reclen); i++) + reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i); + + if (bytes_read < sizeof (reclen) + reclen) + break; + + /* unpack record */ + if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0) + return (ENOMEM); + bytes_read -= sizeof (reclen) + reclen; + buf += sizeof (reclen) + reclen; + + /* add record to nvlist array */ + (*numrecords)++; + if (ISP2(*numrecords + 1)) { + *records = realloc(*records, + *numrecords * 2 * sizeof (nvlist_t *)); + } + (*records)[*numrecords - 1] = nv; + } + + *leftover = bytes_read; + return (0); +} + +/* from spa_history.c: spa_history_create_obj() */ +#define HIS_BUF_LEN_DEF (128 << 10) +#define HIS_BUF_LEN_MAX (1 << 30) + +/* + * Retrieve the command history of a pool. + */ +int +zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) +{ + char *buf = NULL; + uint64_t bufsize = HIS_BUF_LEN_DEF; + uint64_t off = 0; + nvlist_t **records = NULL; + uint_t numrecords = 0; + int err, i; + + if ((buf = malloc(bufsize)) == NULL) + return (ENOMEM); + do { + uint64_t bytes_read = bufsize; + uint64_t leftover; + + if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0) + break; + + /* if nothing else was read in, we're at EOF, just return */ + if (bytes_read == 0) + break; + + if ((err = zpool_history_unpack(buf, bytes_read, + &leftover, &records, &numrecords)) != 0) + break; + off -= leftover; + + /* + * If the history block is too big, double the buffer + * size and try again. + */ + if (leftover == bytes_read) { + free(buf); + buf = NULL; + + bufsize <<= 1; + if ((bufsize >= HIS_BUF_LEN_MAX) || + ((buf = malloc(bufsize)) == NULL)) { + err = ENOMEM; + break; + } + } + + /* CONSTCOND */ + } while (1); + free(buf); + + if (!err) { + verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0); + verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD, + records, numrecords) == 0); + } + for (i = 0; i < numrecords; i++) + nvlist_free(records[i]); + free(records); + + return (err); +} + +void +zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, + char *pathname, size_t len) +{ + zfs_cmd_t zc = { 0 }; + boolean_t mounted = B_FALSE; + char *mntpnt = NULL; + char dsname[MAXNAMELEN]; + + if (dsobj == 0) { + /* special case for the MOS */ + (void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj); + return; + } + + /* get the dataset's name */ + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_obj = dsobj; + if (ioctl(zhp->zpool_hdl->libzfs_fd, + ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) { + /* just write out a path of two object numbers */ + (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>", + dsobj, obj); + return; + } + (void) strlcpy(dsname, zc.zc_value, sizeof (dsname)); + + /* find out if the dataset is mounted */ + mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt); + + /* get the corrupted object's path */ + (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); + zc.zc_obj = obj; + if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH, + &zc) == 0) { + if (mounted) { + (void) snprintf(pathname, len, "%s%s", mntpnt, + zc.zc_value); + } else { + (void) snprintf(pathname, len, "%s:%s", + dsname, zc.zc_value); + } + } else { + (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj); + } + free(mntpnt); +} + +#ifdef sun +/* + * Read the EFI label from the config, if a label does not exist then + * pass back the error to the caller. If the caller has passed a non-NULL + * diskaddr argument then we set it to the starting address of the EFI + * partition. + */ +static int +read_efi_label(nvlist_t *config, diskaddr_t *sb) +{ + char *path; + int fd; + char diskname[MAXPATHLEN]; + int err = -1; + + if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0) + return (err); + + (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT, + strrchr(path, '/')); + if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) { + struct dk_gpt *vtoc; + + if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) { + if (sb != NULL) + *sb = vtoc->efi_parts[0].p_start; + efi_free(vtoc); + } + (void) close(fd); + } + return (err); +} + +/* + * determine where a partition starts on a disk in the current + * configuration + */ +static diskaddr_t +find_start_block(nvlist_t *config) +{ + nvlist_t **child; + uint_t c, children; + diskaddr_t sb = MAXOFFSET_T; + uint64_t wholedisk; + + if (nvlist_lookup_nvlist_array(config, + ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { + if (nvlist_lookup_uint64(config, + ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk) != 0 || !wholedisk) { + return (MAXOFFSET_T); + } + if (read_efi_label(config, &sb) < 0) + sb = MAXOFFSET_T; + return (sb); + } + + for (c = 0; c < children; c++) { + sb = find_start_block(child[c]); + if (sb != MAXOFFSET_T) { + return (sb); + } + } + return (MAXOFFSET_T); +} +#endif /* sun */ + +/* + * Label an individual disk. The name provided is the short name, + * stripped of any leading /dev path. + */ +int +zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) +{ +#ifdef sun + char path[MAXPATHLEN]; + struct dk_gpt *vtoc; + int fd; + size_t resv = EFI_MIN_RESV_SIZE; + uint64_t slice_size; + diskaddr_t start_block; + char errbuf[1024]; + + /* prepare an error message just in case */ + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot label '%s'"), name); + + if (zhp) { + nvlist_t *nvroot; + + if (zpool_is_bootable(zhp)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "EFI labeled devices are not supported on root " + "pools.")); + return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf)); + } + + verify(nvlist_lookup_nvlist(zhp->zpool_config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + + if (zhp->zpool_start_block == 0) + start_block = find_start_block(nvroot); + else + start_block = zhp->zpool_start_block; + zhp->zpool_start_block = start_block; + } else { + /* new pool */ + start_block = NEW_START_BLOCK; + } + + (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name, + BACKUP_SLICE); + + if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { + /* + * This shouldn't happen. We've long since verified that this + * is a valid device. + */ + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "unable to open device")); + return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); + } + + if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) { + /* + * The only way this can fail is if we run out of memory, or we + * were unable to read the disk's capacity + */ + if (errno == ENOMEM) + (void) no_memory(hdl); + + (void) close(fd); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "unable to read disk capacity"), name); + + return (zfs_error(hdl, EZFS_NOCAP, errbuf)); + } + + slice_size = vtoc->efi_last_u_lba + 1; + slice_size -= EFI_MIN_RESV_SIZE; + if (start_block == MAXOFFSET_T) + start_block = NEW_START_BLOCK; + slice_size -= start_block; + + vtoc->efi_parts[0].p_start = start_block; + vtoc->efi_parts[0].p_size = slice_size; + + /* + * Why we use V_USR: V_BACKUP confuses users, and is considered + * disposable by some EFI utilities (since EFI doesn't have a backup + * slice). V_UNASSIGNED is supposed to be used only for zero size + * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT, + * etc. were all pretty specific. V_USR is as close to reality as we + * can get, in the absence of V_OTHER. + */ + vtoc->efi_parts[0].p_tag = V_USR; + (void) strcpy(vtoc->efi_parts[0].p_name, "zfs"); + + vtoc->efi_parts[8].p_start = slice_size + start_block; + vtoc->efi_parts[8].p_size = resv; + vtoc->efi_parts[8].p_tag = V_RESERVED; + + if (efi_write(fd, vtoc) != 0) { + /* + * Some block drivers (like pcata) may not support EFI + * GPT labels. Print out a helpful error message dir- + * ecting the user to manually label the disk and give + * a specific slice. + */ + (void) close(fd); + efi_free(vtoc); + + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "try using fdisk(1M) and then provide a specific slice")); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); + } + + (void) close(fd); + efi_free(vtoc); +#endif /* sun */ + return (0); +} + +static boolean_t +supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf) +{ + char *type; + nvlist_t **child; + uint_t children, c; + + verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0); + if (strcmp(type, VDEV_TYPE_FILE) == 0 || + strcmp(type, VDEV_TYPE_HOLE) == 0 || + strcmp(type, VDEV_TYPE_MISSING) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "vdev type '%s' is not supported"), type); + (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf); + return (B_FALSE); + } + if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if (!supported_dump_vdev_type(hdl, child[c], errbuf)) + return (B_FALSE); + } + } + return (B_TRUE); +} + +/* + * Check if this zvol is allowable for use as a dump device; zero if + * it is, > 0 if it isn't, < 0 if it isn't a zvol. + * + * Allowable storage configurations include mirrors, all raidz variants, and + * pools with log, cache, and spare devices. Pools which are backed by files or + * have missing/hole vdevs are not suitable. + */ +int +zvol_check_dump_config(char *arg) +{ + zpool_handle_t *zhp = NULL; + nvlist_t *config, *nvroot; + char *p, *volname; + nvlist_t **top; + uint_t toplevels; + libzfs_handle_t *hdl; + char errbuf[1024]; + char poolname[ZPOOL_MAXNAMELEN]; + int pathlen = strlen(ZVOL_FULL_DEV_DIR); + int ret = 1; + + if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) { + return (-1); + } + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "dump is not supported on device '%s'"), arg); + + if ((hdl = libzfs_init()) == NULL) + return (1); + libzfs_print_on_error(hdl, B_TRUE); + + volname = arg + pathlen; + + /* check the configuration of the pool */ + if ((p = strchr(volname, '/')) == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "malformed dataset name")); + (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + return (1); + } else if (p - volname >= ZFS_MAXNAMELEN) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset name is too long")); + (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf); + return (1); + } else { + (void) strncpy(poolname, volname, p - volname); + poolname[p - volname] = '\0'; + } + + if ((zhp = zpool_open(hdl, poolname)) == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "could not open pool '%s'"), poolname); + (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); + goto out; + } + config = zpool_get_config(zhp, NULL); + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "could not obtain vdev configuration for '%s'"), poolname); + (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf); + goto out; + } + + verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &top, &toplevels) == 0); + + if (!supported_dump_vdev_type(hdl, top[0], errbuf)) { + goto out; + } + ret = 0; + +out: + if (zhp) + zpool_close(zhp); + libzfs_fini(hdl); + return (ret); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c new file mode 100644 index 0000000..91857b6 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c @@ -0,0 +1,3334 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>. + * All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. + */ + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <stddef.h> +#include <fcntl.h> +#include <sys/param.h> +#include <sys/mount.h> +#include <pthread.h> +#include <umem.h> +#include <time.h> + +#include <libzfs.h> +#include <libzfs_core.h> + +#include "zfs_namecheck.h" +#include "zfs_prop.h" +#include "zfs_fletcher.h" +#include "libzfs_impl.h" +#include <sha2.h> +#include <sys/zio_checksum.h> +#include <sys/ddt.h> + +#ifdef __FreeBSD__ +extern int zfs_ioctl_version; +#endif + +/* in libzfs_dataset.c */ +extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); +/* We need to use something for ENODATA. */ +#define ENODATA EIDRM + +static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *, + int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *); + +static const zio_cksum_t zero_cksum = { 0 }; + +typedef struct dedup_arg { + int inputfd; + int outputfd; + libzfs_handle_t *dedup_hdl; +} dedup_arg_t; + +typedef struct progress_arg { + zfs_handle_t *pa_zhp; + int pa_fd; + boolean_t pa_parsable; +} progress_arg_t; + +typedef struct dataref { + uint64_t ref_guid; + uint64_t ref_object; + uint64_t ref_offset; +} dataref_t; + +typedef struct dedup_entry { + struct dedup_entry *dde_next; + zio_cksum_t dde_chksum; + uint64_t dde_prop; + dataref_t dde_ref; +} dedup_entry_t; + +#define MAX_DDT_PHYSMEM_PERCENT 20 +#define SMALLEST_POSSIBLE_MAX_DDT_MB 128 + +typedef struct dedup_table { + dedup_entry_t **dedup_hash_array; + umem_cache_t *ddecache; + uint64_t max_ddt_size; /* max dedup table size in bytes */ + uint64_t cur_ddt_size; /* current dedup table size in bytes */ + uint64_t ddt_count; + int numhashbits; + boolean_t ddt_full; +} dedup_table_t; + +static int +high_order_bit(uint64_t n) +{ + int count; + + for (count = 0; n != 0; count++) + n >>= 1; + return (count); +} + +static size_t +ssread(void *buf, size_t len, FILE *stream) +{ + size_t outlen; + + if ((outlen = fread(buf, len, 1, stream)) == 0) + return (0); + + return (outlen); +} + +static void +ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp, + zio_cksum_t *cs, uint64_t prop, dataref_t *dr) +{ + dedup_entry_t *dde; + + if (ddt->cur_ddt_size >= ddt->max_ddt_size) { + if (ddt->ddt_full == B_FALSE) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Dedup table full. Deduplication will continue " + "with existing table entries")); + ddt->ddt_full = B_TRUE; + } + return; + } + + if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT)) + != NULL) { + assert(*ddepp == NULL); + dde->dde_next = NULL; + dde->dde_chksum = *cs; + dde->dde_prop = prop; + dde->dde_ref = *dr; + *ddepp = dde; + ddt->cur_ddt_size += sizeof (dedup_entry_t); + ddt->ddt_count++; + } +} + +/* + * Using the specified dedup table, do a lookup for an entry with + * the checksum cs. If found, return the block's reference info + * in *dr. Otherwise, insert a new entry in the dedup table, using + * the reference information specified by *dr. + * + * return value: true - entry was found + * false - entry was not found + */ +static boolean_t +ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs, + uint64_t prop, dataref_t *dr) +{ + uint32_t hashcode; + dedup_entry_t **ddepp; + + hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits); + + for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL; + ddepp = &((*ddepp)->dde_next)) { + if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) && + (*ddepp)->dde_prop == prop) { + *dr = (*ddepp)->dde_ref; + return (B_TRUE); + } + } + ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr); + return (B_FALSE); +} + +static int +cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd) +{ + fletcher_4_incremental_native(buf, len, zc); + return (write(outfd, buf, len)); +} + +/* + * This function is started in a separate thread when the dedup option + * has been requested. The main send thread determines the list of + * snapshots to be included in the send stream and makes the ioctl calls + * for each one. But instead of having the ioctl send the output to the + * the output fd specified by the caller of zfs_send()), the + * ioctl is told to direct the output to a pipe, which is read by the + * alternate thread running THIS function. This function does the + * dedup'ing by: + * 1. building a dedup table (the DDT) + * 2. doing checksums on each data block and inserting a record in the DDT + * 3. looking for matching checksums, and + * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever + * a duplicate block is found. + * The output of this function then goes to the output fd requested + * by the caller of zfs_send(). + */ +static void * +cksummer(void *arg) +{ + dedup_arg_t *dda = arg; + char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE); + dmu_replay_record_t thedrr; + dmu_replay_record_t *drr = &thedrr; + struct drr_begin *drrb = &thedrr.drr_u.drr_begin; + struct drr_end *drre = &thedrr.drr_u.drr_end; + struct drr_object *drro = &thedrr.drr_u.drr_object; + struct drr_write *drrw = &thedrr.drr_u.drr_write; + struct drr_spill *drrs = &thedrr.drr_u.drr_spill; + struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; + FILE *ofp; + int outfd; + dmu_replay_record_t wbr_drr = {0}; + struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref; + dedup_table_t ddt; + zio_cksum_t stream_cksum; + uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); + uint64_t numbuckets; + + ddt.max_ddt_size = + MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100, + SMALLEST_POSSIBLE_MAX_DDT_MB<<20); + + numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t)); + + /* + * numbuckets must be a power of 2. Increase number to + * a power of 2 if necessary. + */ + if (!ISP2(numbuckets)) + numbuckets = 1 << high_order_bit(numbuckets); + + ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *)); + ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0, + NULL, NULL, NULL, NULL, NULL, 0); + ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *); + ddt.numhashbits = high_order_bit(numbuckets) - 1; + ddt.ddt_full = B_FALSE; + + /* Initialize the write-by-reference block. */ + wbr_drr.drr_type = DRR_WRITE_BYREF; + wbr_drr.drr_payloadlen = 0; + + outfd = dda->outputfd; + ofp = fdopen(dda->inputfd, "r"); + while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) { + + switch (drr->drr_type) { + case DRR_BEGIN: + { + int fflags; + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + + /* set the DEDUP feature flag for this stream */ + fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + fflags |= (DMU_BACKUP_FEATURE_DEDUP | + DMU_BACKUP_FEATURE_DEDUPPROPS); + DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); + + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == + DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { + int sz = drr->drr_payloadlen; + + if (sz > SPA_MAXBLOCKSIZE) { + buf = zfs_realloc(dda->dedup_hdl, buf, + SPA_MAXBLOCKSIZE, sz); + } + (void) ssread(buf, sz, ofp); + if (ferror(stdin)) + perror("fread"); + if (cksum_and_write(buf, sz, &stream_cksum, + outfd) == -1) + goto out; + } + break; + } + + case DRR_END: + { + /* use the recalculated checksum */ + ZIO_SET_CHECKSUM(&drre->drr_checksum, + stream_cksum.zc_word[0], stream_cksum.zc_word[1], + stream_cksum.zc_word[2], stream_cksum.zc_word[3]); + if ((write(outfd, drr, + sizeof (dmu_replay_record_t))) == -1) + goto out; + break; + } + + case DRR_OBJECT: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + if (drro->drr_bonuslen > 0) { + (void) ssread(buf, + P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + ofp); + if (cksum_and_write(buf, + P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + &stream_cksum, outfd) == -1) + goto out; + } + break; + } + + case DRR_SPILL: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + (void) ssread(buf, drrs->drr_length, ofp); + if (cksum_and_write(buf, drrs->drr_length, + &stream_cksum, outfd) == -1) + goto out; + break; + } + + case DRR_FREEOBJECTS: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + break; + } + + case DRR_WRITE: + { + dataref_t dataref; + + (void) ssread(buf, drrw->drr_length, ofp); + + /* + * Use the existing checksum if it's dedup-capable, + * else calculate a SHA256 checksum for it. + */ + + if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, + zero_cksum) || + !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { + SHA256_CTX ctx; + zio_cksum_t tmpsha256; + + SHA256Init(&ctx); + SHA256Update(&ctx, buf, drrw->drr_length); + SHA256Final(&tmpsha256, &ctx); + drrw->drr_key.ddk_cksum.zc_word[0] = + BE_64(tmpsha256.zc_word[0]); + drrw->drr_key.ddk_cksum.zc_word[1] = + BE_64(tmpsha256.zc_word[1]); + drrw->drr_key.ddk_cksum.zc_word[2] = + BE_64(tmpsha256.zc_word[2]); + drrw->drr_key.ddk_cksum.zc_word[3] = + BE_64(tmpsha256.zc_word[3]); + drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; + drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; + } + + dataref.ref_guid = drrw->drr_toguid; + dataref.ref_object = drrw->drr_object; + dataref.ref_offset = drrw->drr_offset; + + if (ddt_update(dda->dedup_hdl, &ddt, + &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop, + &dataref)) { + /* block already present in stream */ + wbr_drrr->drr_object = drrw->drr_object; + wbr_drrr->drr_offset = drrw->drr_offset; + wbr_drrr->drr_length = drrw->drr_length; + wbr_drrr->drr_toguid = drrw->drr_toguid; + wbr_drrr->drr_refguid = dataref.ref_guid; + wbr_drrr->drr_refobject = + dataref.ref_object; + wbr_drrr->drr_refoffset = + dataref.ref_offset; + + wbr_drrr->drr_checksumtype = + drrw->drr_checksumtype; + wbr_drrr->drr_checksumflags = + drrw->drr_checksumtype; + wbr_drrr->drr_key.ddk_cksum = + drrw->drr_key.ddk_cksum; + wbr_drrr->drr_key.ddk_prop = + drrw->drr_key.ddk_prop; + + if (cksum_and_write(&wbr_drr, + sizeof (dmu_replay_record_t), &stream_cksum, + outfd) == -1) + goto out; + } else { + /* block not previously seen */ + if (cksum_and_write(drr, + sizeof (dmu_replay_record_t), &stream_cksum, + outfd) == -1) + goto out; + if (cksum_and_write(buf, + drrw->drr_length, + &stream_cksum, outfd) == -1) + goto out; + } + break; + } + + case DRR_WRITE_EMBEDDED: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + (void) ssread(buf, + P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp); + if (cksum_and_write(buf, + P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), + &stream_cksum, outfd) == -1) + goto out; + break; + } + + case DRR_FREE: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + break; + } + + default: + (void) printf("INVALID record type 0x%x\n", + drr->drr_type); + /* should never happen, so assert */ + assert(B_FALSE); + } + } +out: + umem_cache_destroy(ddt.ddecache); + free(ddt.dedup_hash_array); + free(buf); + (void) fclose(ofp); + + return (NULL); +} + +/* + * Routines for dealing with the AVL tree of fs-nvlists + */ +typedef struct fsavl_node { + avl_node_t fn_node; + nvlist_t *fn_nvfs; + char *fn_snapname; + uint64_t fn_guid; +} fsavl_node_t; + +static int +fsavl_compare(const void *arg1, const void *arg2) +{ + const fsavl_node_t *fn1 = arg1; + const fsavl_node_t *fn2 = arg2; + + if (fn1->fn_guid > fn2->fn_guid) + return (+1); + else if (fn1->fn_guid < fn2->fn_guid) + return (-1); + else + return (0); +} + +/* + * Given the GUID of a snapshot, find its containing filesystem and + * (optionally) name. + */ +static nvlist_t * +fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname) +{ + fsavl_node_t fn_find; + fsavl_node_t *fn; + + fn_find.fn_guid = snapguid; + + fn = avl_find(avl, &fn_find, NULL); + if (fn) { + if (snapname) + *snapname = fn->fn_snapname; + return (fn->fn_nvfs); + } + return (NULL); +} + +static void +fsavl_destroy(avl_tree_t *avl) +{ + fsavl_node_t *fn; + void *cookie; + + if (avl == NULL) + return; + + cookie = NULL; + while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL) + free(fn); + avl_destroy(avl); + free(avl); +} + +/* + * Given an nvlist, produce an avl tree of snapshots, ordered by guid + */ +static avl_tree_t * +fsavl_create(nvlist_t *fss) +{ + avl_tree_t *fsavl; + nvpair_t *fselem = NULL; + + if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL) + return (NULL); + + avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t), + offsetof(fsavl_node_t, fn_node)); + + while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) { + nvlist_t *nvfs, *snaps; + nvpair_t *snapelem = NULL; + + VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); + VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); + + while ((snapelem = + nvlist_next_nvpair(snaps, snapelem)) != NULL) { + fsavl_node_t *fn; + uint64_t guid; + + VERIFY(0 == nvpair_value_uint64(snapelem, &guid)); + if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) { + fsavl_destroy(fsavl); + return (NULL); + } + fn->fn_nvfs = nvfs; + fn->fn_snapname = nvpair_name(snapelem); + fn->fn_guid = guid; + + /* + * Note: if there are multiple snaps with the + * same GUID, we ignore all but one. + */ + if (avl_find(fsavl, fn, NULL) == NULL) + avl_add(fsavl, fn); + else + free(fn); + } + } + + return (fsavl); +} + +/* + * Routines for dealing with the giant nvlist of fs-nvlists, etc. + */ +typedef struct send_data { + uint64_t parent_fromsnap_guid; + nvlist_t *parent_snaps; + nvlist_t *fss; + nvlist_t *snapprops; + const char *fromsnap; + const char *tosnap; + boolean_t recursive; + + /* + * The header nvlist is of the following format: + * { + * "tosnap" -> string + * "fromsnap" -> string (if incremental) + * "fss" -> { + * id -> { + * + * "name" -> string (full name; for debugging) + * "parentfromsnap" -> number (guid of fromsnap in parent) + * + * "props" -> { name -> value (only if set here) } + * "snaps" -> { name (lastname) -> number (guid) } + * "snapprops" -> { name (lastname) -> { name -> value } } + * + * "origin" -> number (guid) (if clone) + * "sent" -> boolean (not on-disk) + * } + * } + * } + * + */ +} send_data_t; + +static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv); + +static int +send_iterate_snap(zfs_handle_t *zhp, void *arg) +{ + send_data_t *sd = arg; + uint64_t guid = zhp->zfs_dmustats.dds_guid; + char *snapname; + nvlist_t *nv; + + snapname = strrchr(zhp->zfs_name, '@')+1; + + VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid)); + /* + * NB: if there is no fromsnap here (it's a newly created fs in + * an incremental replication), we will substitute the tosnap. + */ + if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) || + (sd->parent_fromsnap_guid == 0 && sd->tosnap && + strcmp(snapname, sd->tosnap) == 0)) { + sd->parent_fromsnap_guid = guid; + } + + VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); + send_iterate_prop(zhp, nv); + VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv)); + nvlist_free(nv); + + zfs_close(zhp); + return (0); +} + +static void +send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv) +{ + nvpair_t *elem = NULL; + + while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) { + char *propname = nvpair_name(elem); + zfs_prop_t prop = zfs_name_to_prop(propname); + nvlist_t *propnv; + + if (!zfs_prop_user(propname)) { + /* + * Realistically, this should never happen. However, + * we want the ability to add DSL properties without + * needing to make incompatible version changes. We + * need to ignore unknown properties to allow older + * software to still send datasets containing these + * properties, with the unknown properties elided. + */ + if (prop == ZPROP_INVAL) + continue; + + if (zfs_prop_readonly(prop)) + continue; + } + + verify(nvpair_value_nvlist(elem, &propnv) == 0); + if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION || + prop == ZFS_PROP_REFQUOTA || + prop == ZFS_PROP_REFRESERVATION) { + char *source; + uint64_t value; + verify(nvlist_lookup_uint64(propnv, + ZPROP_VALUE, &value) == 0); + if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) + continue; + /* + * May have no source before SPA_VERSION_RECVD_PROPS, + * but is still modifiable. + */ + if (nvlist_lookup_string(propnv, + ZPROP_SOURCE, &source) == 0) { + if ((strcmp(source, zhp->zfs_name) != 0) && + (strcmp(source, + ZPROP_SOURCE_VAL_RECVD) != 0)) + continue; + } + } else { + char *source; + if (nvlist_lookup_string(propnv, + ZPROP_SOURCE, &source) != 0) + continue; + if ((strcmp(source, zhp->zfs_name) != 0) && + (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)) + continue; + } + + if (zfs_prop_user(propname) || + zfs_prop_get_type(prop) == PROP_TYPE_STRING) { + char *value; + verify(nvlist_lookup_string(propnv, + ZPROP_VALUE, &value) == 0); + VERIFY(0 == nvlist_add_string(nv, propname, value)); + } else { + uint64_t value; + verify(nvlist_lookup_uint64(propnv, + ZPROP_VALUE, &value) == 0); + VERIFY(0 == nvlist_add_uint64(nv, propname, value)); + } + } +} + +/* + * recursively generate nvlists describing datasets. See comment + * for the data structure send_data_t above for description of contents + * of the nvlist. + */ +static int +send_iterate_fs(zfs_handle_t *zhp, void *arg) +{ + send_data_t *sd = arg; + nvlist_t *nvfs, *nv; + int rv = 0; + uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; + uint64_t guid = zhp->zfs_dmustats.dds_guid; + char guidstring[64]; + + VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0)); + VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name)); + VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap", + sd->parent_fromsnap_guid)); + + if (zhp->zfs_dmustats.dds_origin[0]) { + zfs_handle_t *origin = zfs_open(zhp->zfs_hdl, + zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); + if (origin == NULL) + return (-1); + VERIFY(0 == nvlist_add_uint64(nvfs, "origin", + origin->zfs_dmustats.dds_guid)); + } + + /* iterate over props */ + VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); + send_iterate_prop(zhp, nv); + VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); + nvlist_free(nv); + + /* iterate over snaps, and set sd->parent_fromsnap_guid */ + sd->parent_fromsnap_guid = 0; + VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0)); + VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0)); + (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd); + VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps)); + VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops)); + nvlist_free(sd->parent_snaps); + nvlist_free(sd->snapprops); + + /* add this fs to nvlist */ + (void) snprintf(guidstring, sizeof (guidstring), + "0x%llx", (longlong_t)guid); + VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); + nvlist_free(nvfs); + + /* iterate over children */ + if (sd->recursive) + rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd); + + sd->parent_fromsnap_guid = parent_fromsnap_guid_save; + + zfs_close(zhp); + return (rv); +} + +static int +gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, + const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp) +{ + zfs_handle_t *zhp; + send_data_t sd = { 0 }; + int error; + + zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (EZFS_BADTYPE); + + VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0)); + sd.fromsnap = fromsnap; + sd.tosnap = tosnap; + sd.recursive = recursive; + + if ((error = send_iterate_fs(zhp, &sd)) != 0) { + nvlist_free(sd.fss); + if (avlp != NULL) + *avlp = NULL; + *nvlp = NULL; + return (error); + } + + if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) { + nvlist_free(sd.fss); + *nvlp = NULL; + return (EZFS_NOMEM); + } + + *nvlp = sd.fss; + return (0); +} + +/* + * Routines specific to "zfs send" + */ +typedef struct send_dump_data { + /* these are all just the short snapname (the part after the @) */ + const char *fromsnap; + const char *tosnap; + char prevsnap[ZFS_MAXNAMELEN]; + uint64_t prevsnap_obj; + boolean_t seenfrom, seento, replicate, doall, fromorigin; + boolean_t verbose, dryrun, parsable, progress, embed_data, large_block; + int outfd; + boolean_t err; + nvlist_t *fss; + nvlist_t *snapholds; + avl_tree_t *fsavl; + snapfilter_cb_t *filter_cb; + void *filter_cb_arg; + nvlist_t *debugnv; + char holdtag[ZFS_MAXNAMELEN]; + int cleanup_fd; + uint64_t size; +} send_dump_data_t; + +static int +estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj, + boolean_t fromorigin, uint64_t *sizep) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + assert(fromsnap_obj == 0 || !fromorigin); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + zc.zc_obj = fromorigin; + zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); + zc.zc_fromobj = fromsnap_obj; + zc.zc_guid = 1; /* estimate flag */ + + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot estimate space for '%s'"), zhp->zfs_name); + + switch (errno) { + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not an earlier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + + case ENOENT: + if (zfs_dataset_exists(hdl, zc.zc_name, + ZFS_TYPE_SNAPSHOT)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source (@%s) does not exist"), + zc.zc_value); + } + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + + case EDQUOT: + case EFBIG: + case EIO: + case ENOLINK: + case ENOSPC: + case ENXIO: + case EPIPE: + case ERANGE: + case EFAULT: + case EROFS: + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); + + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + + *sizep = zc.zc_objset_type; + + return (0); +} + +/* + * Dumps a backup of the given snapshot (incremental from fromsnap if it's not + * NULL) to the file descriptor specified by outfd. + */ +static int +dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, + boolean_t fromorigin, int outfd, enum lzc_send_flags flags, + nvlist_t *debugnv) +{ + zfs_cmd_t zc = { 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + nvlist_t *thisdbg; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + assert(fromsnap_obj == 0 || !fromorigin); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + zc.zc_cookie = outfd; + zc.zc_obj = fromorigin; + zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); + zc.zc_fromobj = fromsnap_obj; + zc.zc_flags = flags; + + VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0)); + if (fromsnap && fromsnap[0] != '\0') { + VERIFY(0 == nvlist_add_string(thisdbg, + "fromsnap", fromsnap)); + } + + if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot send '%s'"), zhp->zfs_name); + + VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno)); + if (debugnv) { + VERIFY(0 == nvlist_add_nvlist(debugnv, + zhp->zfs_name, thisdbg)); + } + nvlist_free(thisdbg); + + switch (errno) { + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not an earlier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + + case ENOENT: + if (zfs_dataset_exists(hdl, zc.zc_name, + ZFS_TYPE_SNAPSHOT)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source (@%s) does not exist"), + zc.zc_value); + } + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + + case EDQUOT: + case EFBIG: + case EIO: + case ENOLINK: + case ENOSPC: +#ifdef sun + case ENOSTR: +#endif + case ENXIO: + case EPIPE: + case ERANGE: + case EFAULT: + case EROFS: + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); + + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + + if (debugnv) + VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg)); + nvlist_free(thisdbg); + + return (0); +} + +static void +gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd) +{ + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + + /* + * zfs_send() only sets snapholds for sends that need them, + * e.g. replication and doall. + */ + if (sdd->snapholds == NULL) + return; + + fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag); +} + +static void * +send_progress_thread(void *arg) +{ + progress_arg_t *pa = arg; + + zfs_cmd_t zc = { 0 }; + zfs_handle_t *zhp = pa->pa_zhp; + libzfs_handle_t *hdl = zhp->zfs_hdl; + unsigned long long bytes; + char buf[16]; + + time_t t; + struct tm *tm; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + if (!pa->pa_parsable) + (void) fprintf(stderr, "TIME SENT SNAPSHOT\n"); + + /* + * Print the progress from ZFS_IOC_SEND_PROGRESS every second. + */ + for (;;) { + (void) sleep(1); + + zc.zc_cookie = pa->pa_fd; + if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0) + return ((void *)-1); + + (void) time(&t); + tm = localtime(&t); + bytes = zc.zc_cookie; + + if (pa->pa_parsable) { + (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", + tm->tm_hour, tm->tm_min, tm->tm_sec, + bytes, zhp->zfs_name); + } else { + zfs_nicenum(bytes, buf, sizeof (buf)); + (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", + tm->tm_hour, tm->tm_min, tm->tm_sec, + buf, zhp->zfs_name); + } + } +} + +static int +dump_snapshot(zfs_handle_t *zhp, void *arg) +{ + send_dump_data_t *sdd = arg; + progress_arg_t pa = { 0 }; + pthread_t tid; + char *thissnap; + int err; + boolean_t isfromsnap, istosnap, fromorigin; + boolean_t exclude = B_FALSE; + + err = 0; + thissnap = strchr(zhp->zfs_name, '@') + 1; + isfromsnap = (sdd->fromsnap != NULL && + strcmp(sdd->fromsnap, thissnap) == 0); + + if (!sdd->seenfrom && isfromsnap) { + gather_holds(zhp, sdd); + sdd->seenfrom = B_TRUE; + (void) strcpy(sdd->prevsnap, thissnap); + sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); + zfs_close(zhp); + return (0); + } + + if (sdd->seento || !sdd->seenfrom) { + zfs_close(zhp); + return (0); + } + + istosnap = (strcmp(sdd->tosnap, thissnap) == 0); + if (istosnap) + sdd->seento = B_TRUE; + + if (!sdd->doall && !isfromsnap && !istosnap) { + if (sdd->replicate) { + char *snapname; + nvlist_t *snapprops; + /* + * Filter out all intermediate snapshots except origin + * snapshots needed to replicate clones. + */ + nvlist_t *nvfs = fsavl_find(sdd->fsavl, + zhp->zfs_dmustats.dds_guid, &snapname); + + VERIFY(0 == nvlist_lookup_nvlist(nvfs, + "snapprops", &snapprops)); + VERIFY(0 == nvlist_lookup_nvlist(snapprops, + thissnap, &snapprops)); + exclude = !nvlist_exists(snapprops, "is_clone_origin"); + } else { + exclude = B_TRUE; + } + } + + /* + * If a filter function exists, call it to determine whether + * this snapshot will be sent. + */ + if (exclude || (sdd->filter_cb != NULL && + sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) { + /* + * This snapshot is filtered out. Don't send it, and don't + * set prevsnap_obj, so it will be as if this snapshot didn't + * exist, and the next accepted snapshot will be sent as + * an incremental from the last accepted one, or as the + * first (and full) snapshot in the case of a replication, + * non-incremental send. + */ + zfs_close(zhp); + return (0); + } + + gather_holds(zhp, sdd); + fromorigin = sdd->prevsnap[0] == '\0' && + (sdd->fromorigin || sdd->replicate); + + if (sdd->verbose) { + uint64_t size; + err = estimate_ioctl(zhp, sdd->prevsnap_obj, + fromorigin, &size); + + if (sdd->parsable) { + if (sdd->prevsnap[0] != '\0') { + (void) fprintf(stderr, "incremental\t%s\t%s", + sdd->prevsnap, zhp->zfs_name); + } else { + (void) fprintf(stderr, "full\t%s", + zhp->zfs_name); + } + } else { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "send from @%s to %s"), + sdd->prevsnap, zhp->zfs_name); + } + if (err == 0) { + if (sdd->parsable) { + (void) fprintf(stderr, "\t%llu\n", + (longlong_t)size); + } else { + char buf[16]; + zfs_nicenum(size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + " estimated size is %s\n"), buf); + } + sdd->size += size; + } else { + (void) fprintf(stderr, "\n"); + } + } + + if (!sdd->dryrun) { + /* + * If progress reporting is requested, spawn a new thread to + * poll ZFS_IOC_SEND_PROGRESS at a regular interval. + */ + if (sdd->progress) { + pa.pa_zhp = zhp; + pa.pa_fd = sdd->outfd; + pa.pa_parsable = sdd->parsable; + + if (err = pthread_create(&tid, NULL, + send_progress_thread, &pa)) { + zfs_close(zhp); + return (err); + } + } + + enum lzc_send_flags flags = 0; + if (sdd->large_block) + flags |= LZC_SEND_FLAG_LARGE_BLOCK; + if (sdd->embed_data) + flags |= LZC_SEND_FLAG_EMBED_DATA; + + err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, + fromorigin, sdd->outfd, flags, sdd->debugnv); + + if (sdd->progress) { + (void) pthread_cancel(tid); + (void) pthread_join(tid, NULL); + } + } + + (void) strcpy(sdd->prevsnap, thissnap); + sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); + zfs_close(zhp); + return (err); +} + +static int +dump_filesystem(zfs_handle_t *zhp, void *arg) +{ + int rv = 0; + send_dump_data_t *sdd = arg; + boolean_t missingfrom = B_FALSE; + zfs_cmd_t zc = { 0 }; + + (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", + zhp->zfs_name, sdd->tosnap); + if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "WARNING: could not send %s@%s: does not exist\n"), + zhp->zfs_name, sdd->tosnap); + sdd->err = B_TRUE; + return (0); + } + + if (sdd->replicate && sdd->fromsnap) { + /* + * If this fs does not have fromsnap, and we're doing + * recursive, we need to send a full stream from the + * beginning (or an incremental from the origin if this + * is a clone). If we're doing non-recursive, then let + * them get the error. + */ + (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", + zhp->zfs_name, sdd->fromsnap); + if (ioctl(zhp->zfs_hdl->libzfs_fd, + ZFS_IOC_OBJSET_STATS, &zc) != 0) { + missingfrom = B_TRUE; + } + } + + sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0; + sdd->prevsnap_obj = 0; + if (sdd->fromsnap == NULL || missingfrom) + sdd->seenfrom = B_TRUE; + + rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg); + if (!sdd->seenfrom) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "WARNING: could not send %s@%s:\n" + "incremental source (%s@%s) does not exist\n"), + zhp->zfs_name, sdd->tosnap, + zhp->zfs_name, sdd->fromsnap); + sdd->err = B_TRUE; + } else if (!sdd->seento) { + if (sdd->fromsnap) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "WARNING: could not send %s@%s:\n" + "incremental source (%s@%s) " + "is not earlier than it\n"), + zhp->zfs_name, sdd->tosnap, + zhp->zfs_name, sdd->fromsnap); + } else { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "WARNING: " + "could not send %s@%s: does not exist\n"), + zhp->zfs_name, sdd->tosnap); + } + sdd->err = B_TRUE; + } + + return (rv); +} + +static int +dump_filesystems(zfs_handle_t *rzhp, void *arg) +{ + send_dump_data_t *sdd = arg; + nvpair_t *fspair; + boolean_t needagain, progress; + + if (!sdd->replicate) + return (dump_filesystem(rzhp, sdd)); + + /* Mark the clone origin snapshots. */ + for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; + fspair = nvlist_next_nvpair(sdd->fss, fspair)) { + nvlist_t *nvfs; + uint64_t origin_guid = 0; + + VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs)); + (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid); + if (origin_guid != 0) { + char *snapname; + nvlist_t *origin_nv = fsavl_find(sdd->fsavl, + origin_guid, &snapname); + if (origin_nv != NULL) { + nvlist_t *snapprops; + VERIFY(0 == nvlist_lookup_nvlist(origin_nv, + "snapprops", &snapprops)); + VERIFY(0 == nvlist_lookup_nvlist(snapprops, + snapname, &snapprops)); + VERIFY(0 == nvlist_add_boolean( + snapprops, "is_clone_origin")); + } + } + } +again: + needagain = progress = B_FALSE; + for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; + fspair = nvlist_next_nvpair(sdd->fss, fspair)) { + nvlist_t *fslist, *parent_nv; + char *fsname; + zfs_handle_t *zhp; + int err; + uint64_t origin_guid = 0; + uint64_t parent_guid = 0; + + VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); + if (nvlist_lookup_boolean(fslist, "sent") == 0) + continue; + + VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0); + (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid); + (void) nvlist_lookup_uint64(fslist, "parentfromsnap", + &parent_guid); + + if (parent_guid != 0) { + parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL); + if (!nvlist_exists(parent_nv, "sent")) { + /* parent has not been sent; skip this one */ + needagain = B_TRUE; + continue; + } + } + + if (origin_guid != 0) { + nvlist_t *origin_nv = fsavl_find(sdd->fsavl, + origin_guid, NULL); + if (origin_nv != NULL && + !nvlist_exists(origin_nv, "sent")) { + /* + * origin has not been sent yet; + * skip this clone. + */ + needagain = B_TRUE; + continue; + } + } + + zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET); + if (zhp == NULL) + return (-1); + err = dump_filesystem(zhp, sdd); + VERIFY(nvlist_add_boolean(fslist, "sent") == 0); + progress = B_TRUE; + zfs_close(zhp); + if (err) + return (err); + } + if (needagain) { + assert(progress); + goto again; + } + + /* clean out the sent flags in case we reuse this fss */ + for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; + fspair = nvlist_next_nvpair(sdd->fss, fspair)) { + nvlist_t *fslist; + + VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); + (void) nvlist_remove_all(fslist, "sent"); + } + + return (0); +} + +/* + * Generate a send stream for the dataset identified by the argument zhp. + * + * The content of the send stream is the snapshot identified by + * 'tosnap'. Incremental streams are requested in two ways: + * - from the snapshot identified by "fromsnap" (if non-null) or + * - from the origin of the dataset identified by zhp, which must + * be a clone. In this case, "fromsnap" is null and "fromorigin" + * is TRUE. + * + * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and + * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM) + * if "replicate" is set. If "doall" is set, dump all the intermediate + * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall" + * case too. If "props" is set, send properties. + */ +int +zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, + sendflags_t *flags, int outfd, snapfilter_cb_t filter_func, + void *cb_arg, nvlist_t **debugnvp) +{ + char errbuf[1024]; + send_dump_data_t sdd = { 0 }; + int err = 0; + nvlist_t *fss = NULL; + avl_tree_t *fsavl = NULL; + static uint64_t holdseq; + int spa_version; + pthread_t tid = 0; + int pipefd[2]; + dedup_arg_t dda = { 0 }; + int featureflags = 0; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot send '%s'"), zhp->zfs_name); + + if (fromsnap && fromsnap[0] == '\0') { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "zero-length incremental source")); + return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); + } + + if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) { + uint64_t version; + version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); + if (version >= ZPL_VERSION_SA) { + featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; + } + } + + if (flags->dedup && !flags->dryrun) { + featureflags |= (DMU_BACKUP_FEATURE_DEDUP | + DMU_BACKUP_FEATURE_DEDUPPROPS); + if (err = pipe(pipefd)) { + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, + errbuf)); + } + dda.outputfd = outfd; + dda.inputfd = pipefd[1]; + dda.dedup_hdl = zhp->zfs_hdl; + if (err = pthread_create(&tid, NULL, cksummer, &dda)) { + (void) close(pipefd[0]); + (void) close(pipefd[1]); + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + return (zfs_error(zhp->zfs_hdl, + EZFS_THREADCREATEFAILED, errbuf)); + } + } + + if (flags->replicate || flags->doall || flags->props) { + dmu_replay_record_t drr = { 0 }; + char *packbuf = NULL; + size_t buflen = 0; + zio_cksum_t zc = { 0 }; + + if (flags->replicate || flags->props) { + nvlist_t *hdrnv; + + VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0)); + if (fromsnap) { + VERIFY(0 == nvlist_add_string(hdrnv, + "fromsnap", fromsnap)); + } + VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap)); + if (!flags->replicate) { + VERIFY(0 == nvlist_add_boolean(hdrnv, + "not_recursive")); + } + + err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, + fromsnap, tosnap, flags->replicate, &fss, &fsavl); + if (err) + goto err_out; + VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); + err = nvlist_pack(hdrnv, &packbuf, &buflen, + NV_ENCODE_XDR, 0); + if (debugnvp) + *debugnvp = hdrnv; + else + nvlist_free(hdrnv); + if (err) + goto stderr_out; + } + + if (!flags->dryrun) { + /* write first begin record */ + drr.drr_type = DRR_BEGIN; + drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; + DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin. + drr_versioninfo, DMU_COMPOUNDSTREAM); + DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin. + drr_versioninfo, featureflags); + (void) snprintf(drr.drr_u.drr_begin.drr_toname, + sizeof (drr.drr_u.drr_begin.drr_toname), + "%s@%s", zhp->zfs_name, tosnap); + drr.drr_payloadlen = buflen; + err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); + + /* write header nvlist */ + if (err != -1 && packbuf != NULL) { + err = cksum_and_write(packbuf, buflen, &zc, + outfd); + } + free(packbuf); + if (err == -1) { + err = errno; + goto stderr_out; + } + + /* write end record */ + bzero(&drr, sizeof (drr)); + drr.drr_type = DRR_END; + drr.drr_u.drr_end.drr_checksum = zc; + err = write(outfd, &drr, sizeof (drr)); + if (err == -1) { + err = errno; + goto stderr_out; + } + + err = 0; + } + } + + /* dump each stream */ + sdd.fromsnap = fromsnap; + sdd.tosnap = tosnap; + if (tid != 0) + sdd.outfd = pipefd[0]; + else + sdd.outfd = outfd; + sdd.replicate = flags->replicate; + sdd.doall = flags->doall; + sdd.fromorigin = flags->fromorigin; + sdd.fss = fss; + sdd.fsavl = fsavl; + sdd.verbose = flags->verbose; + sdd.parsable = flags->parsable; + sdd.progress = flags->progress; + sdd.dryrun = flags->dryrun; + sdd.large_block = flags->largeblock; + sdd.embed_data = flags->embed_data; + sdd.filter_cb = filter_func; + sdd.filter_cb_arg = cb_arg; + if (debugnvp) + sdd.debugnv = *debugnvp; + + /* + * Some flags require that we place user holds on the datasets that are + * being sent so they don't get destroyed during the send. We can skip + * this step if the pool is imported read-only since the datasets cannot + * be destroyed. + */ + if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp), + ZPOOL_PROP_READONLY, NULL) && + zfs_spa_version(zhp, &spa_version) == 0 && + spa_version >= SPA_VERSION_USERREFS && + (flags->doall || flags->replicate)) { + ++holdseq; + (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag), + ".send-%d-%llu", getpid(), (u_longlong_t)holdseq); + sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); + if (sdd.cleanup_fd < 0) { + err = errno; + goto stderr_out; + } + sdd.snapholds = fnvlist_alloc(); + } else { + sdd.cleanup_fd = -1; + sdd.snapholds = NULL; + } + if (flags->verbose || sdd.snapholds != NULL) { + /* + * Do a verbose no-op dry run to get all the verbose output + * or to gather snapshot hold's before generating any data, + * then do a non-verbose real run to generate the streams. + */ + sdd.dryrun = B_TRUE; + err = dump_filesystems(zhp, &sdd); + + if (err != 0) + goto stderr_out; + + if (flags->verbose) { + if (flags->parsable) { + (void) fprintf(stderr, "size\t%llu\n", + (longlong_t)sdd.size); + } else { + char buf[16]; + zfs_nicenum(sdd.size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "total estimated size is %s\n"), buf); + } + } + + /* Ensure no snaps found is treated as an error. */ + if (!sdd.seento) { + err = ENOENT; + goto err_out; + } + + /* Skip the second run if dryrun was requested. */ + if (flags->dryrun) + goto err_out; + + if (sdd.snapholds != NULL) { + err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds); + if (err != 0) + goto stderr_out; + + fnvlist_free(sdd.snapholds); + sdd.snapholds = NULL; + } + + sdd.dryrun = B_FALSE; + sdd.verbose = B_FALSE; + } + + err = dump_filesystems(zhp, &sdd); + fsavl_destroy(fsavl); + nvlist_free(fss); + + /* Ensure no snaps found is treated as an error. */ + if (err == 0 && !sdd.seento) + err = ENOENT; + + if (tid != 0) { + if (err != 0) + (void) pthread_cancel(tid); + (void) close(pipefd[0]); + (void) pthread_join(tid, NULL); + } + + if (sdd.cleanup_fd != -1) { + VERIFY(0 == close(sdd.cleanup_fd)); + sdd.cleanup_fd = -1; + } + + if (!flags->dryrun && (flags->replicate || flags->doall || + flags->props)) { + /* + * write final end record. NB: want to do this even if + * there was some error, because it might not be totally + * failed. + */ + dmu_replay_record_t drr = { 0 }; + drr.drr_type = DRR_END; + if (write(outfd, &drr, sizeof (drr)) == -1) { + return (zfs_standard_error(zhp->zfs_hdl, + errno, errbuf)); + } + } + + return (err || sdd.err); + +stderr_out: + err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); +err_out: + fsavl_destroy(fsavl); + nvlist_free(fss); + fnvlist_free(sdd.snapholds); + + if (sdd.cleanup_fd != -1) + VERIFY(0 == close(sdd.cleanup_fd)); + if (tid != 0) { + (void) pthread_cancel(tid); + (void) close(pipefd[0]); + (void) pthread_join(tid, NULL); + } + return (err); +} + +int +zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, + enum lzc_send_flags flags) +{ + int err; + libzfs_handle_t *hdl = zhp->zfs_hdl; + + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot send '%s'"), zhp->zfs_name); + + err = lzc_send(zhp->zfs_name, from, fd, flags); + if (err != 0) { + switch (errno) { + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "not an earlier snapshot from the same fs")); + return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + + case ENOENT: + case ESRCH: + if (lzc_exists(zhp->zfs_name)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source (%s) does not exist"), + from); + } + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + + case EBUSY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "target is busy; if a filesystem, " + "it must not be mounted")); + return (zfs_error(hdl, EZFS_BUSY, errbuf)); + + case EDQUOT: + case EFBIG: + case EIO: + case ENOLINK: + case ENOSPC: +#ifdef illumos + case ENOSTR: +#endif + case ENXIO: + case EPIPE: + case ERANGE: + case EFAULT: + case EROFS: + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); + + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + return (err != 0); +} + +/* + * Routines specific to "zfs recv" + */ + +static int +recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen, + boolean_t byteswap, zio_cksum_t *zc) +{ + char *cp = buf; + int rv; + int len = ilen; + + do { + rv = read(fd, cp, len); + cp += rv; + len -= rv; + } while (rv > 0); + + if (rv < 0 || len != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to read from stream")); + return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN, + "cannot receive"))); + } + + if (zc) { + if (byteswap) + fletcher_4_incremental_byteswap(buf, ilen, zc); + else + fletcher_4_incremental_native(buf, ilen, zc); + } + return (0); +} + +static int +recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp, + boolean_t byteswap, zio_cksum_t *zc) +{ + char *buf; + int err; + + buf = zfs_alloc(hdl, len); + if (buf == NULL) + return (ENOMEM); + + err = recv_read(hdl, fd, buf, len, byteswap, zc); + if (err != 0) { + free(buf); + return (err); + } + + err = nvlist_unpack(buf, len, nvp, 0); + free(buf); + if (err != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "stream (malformed nvlist)")); + return (EINVAL); + } + return (0); +} + +static int +recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, + int baselen, char *newname, recvflags_t *flags) +{ + static int seq; + zfs_cmd_t zc = { 0 }; + int err; + prop_changelist_t *clp; + zfs_handle_t *zhp; + + zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); + if (zhp == NULL) + return (-1); + clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, + flags->force ? MS_FORCE : 0); + zfs_close(zhp); + if (clp == NULL) + return (-1); + err = changelist_prefix(clp); + if (err) + return (err); + + zc.zc_objset_type = DMU_OST_ZFS; + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + + if (tryname) { + (void) strcpy(newname, tryname); + + (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value)); + + if (flags->verbose) { + (void) printf("attempting rename %s to %s\n", + zc.zc_name, zc.zc_value); + } + err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); + if (err == 0) + changelist_rename(clp, name, tryname); + } else { + err = ENOENT; + } + + if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) { + seq++; + + (void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u", + baselen, name, getpid(), seq); + (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value)); + + if (flags->verbose) { + (void) printf("failed - trying rename %s to %s\n", + zc.zc_name, zc.zc_value); + } + err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); + if (err == 0) + changelist_rename(clp, name, newname); + if (err && flags->verbose) { + (void) printf("failed (%u) - " + "will try again on next pass\n", errno); + } + err = EAGAIN; + } else if (flags->verbose) { + if (err == 0) + (void) printf("success\n"); + else + (void) printf("failed (%u)\n", errno); + } + + (void) changelist_postfix(clp); + changelist_free(clp); + + return (err); +} + +static int +recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, + char *newname, recvflags_t *flags) +{ + zfs_cmd_t zc = { 0 }; + int err = 0; + prop_changelist_t *clp; + zfs_handle_t *zhp; + boolean_t defer = B_FALSE; + int spa_version; + + zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); + if (zhp == NULL) + return (-1); + clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, + flags->force ? MS_FORCE : 0); + if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && + zfs_spa_version(zhp, &spa_version) == 0 && + spa_version >= SPA_VERSION_USERREFS) + defer = B_TRUE; + zfs_close(zhp); + if (clp == NULL) + return (-1); + err = changelist_prefix(clp); + if (err) + return (err); + + zc.zc_objset_type = DMU_OST_ZFS; + zc.zc_defer_destroy = defer; + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + + if (flags->verbose) + (void) printf("attempting destroy %s\n", zc.zc_name); + err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc); + if (err == 0) { + if (flags->verbose) + (void) printf("success\n"); + changelist_remove(clp, zc.zc_name); + } + + (void) changelist_postfix(clp); + changelist_free(clp); + + /* + * Deferred destroy might destroy the snapshot or only mark it to be + * destroyed later, and it returns success in either case. + */ + if (err != 0 || (defer && zfs_dataset_exists(hdl, name, + ZFS_TYPE_SNAPSHOT))) { + err = recv_rename(hdl, name, NULL, baselen, newname, flags); + } + + return (err); +} + +typedef struct guid_to_name_data { + uint64_t guid; + char *name; + char *skip; +} guid_to_name_data_t; + +static int +guid_to_name_cb(zfs_handle_t *zhp, void *arg) +{ + guid_to_name_data_t *gtnd = arg; + int err; + + if (gtnd->skip != NULL && + strcmp(zhp->zfs_name, gtnd->skip) == 0) { + return (0); + } + + if (zhp->zfs_dmustats.dds_guid == gtnd->guid) { + (void) strcpy(gtnd->name, zhp->zfs_name); + zfs_close(zhp); + return (EEXIST); + } + + err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); + zfs_close(zhp); + return (err); +} + +/* + * Attempt to find the local dataset associated with this guid. In the case of + * multiple matches, we attempt to find the "best" match by searching + * progressively larger portions of the hierarchy. This allows one to send a + * tree of datasets individually and guarantee that we will find the source + * guid within that hierarchy, even if there are multiple matches elsewhere. + */ +static int +guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, + char *name) +{ + /* exhaustive search all local snapshots */ + char pname[ZFS_MAXNAMELEN]; + guid_to_name_data_t gtnd; + int err = 0; + zfs_handle_t *zhp; + char *cp; + + gtnd.guid = guid; + gtnd.name = name; + gtnd.skip = NULL; + + (void) strlcpy(pname, parent, sizeof (pname)); + + /* + * Search progressively larger portions of the hierarchy. This will + * select the "most local" version of the origin snapshot in the case + * that there are multiple matching snapshots in the system. + */ + while ((cp = strrchr(pname, '/')) != NULL) { + + /* Chop off the last component and open the parent */ + *cp = '\0'; + zhp = make_dataset_handle(hdl, pname); + + if (zhp == NULL) + continue; + + err = zfs_iter_children(zhp, guid_to_name_cb, >nd); + zfs_close(zhp); + if (err == EEXIST) + return (0); + + /* + * Remember the dataset that we already searched, so we + * skip it next time through. + */ + gtnd.skip = pname; + } + + return (ENOENT); +} + +/* + * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if + * guid1 is after guid2. + */ +static int +created_before(libzfs_handle_t *hdl, avl_tree_t *avl, + uint64_t guid1, uint64_t guid2) +{ + nvlist_t *nvfs; + char *fsname, *snapname; + char buf[ZFS_MAXNAMELEN]; + int rv; + zfs_handle_t *guid1hdl, *guid2hdl; + uint64_t create1, create2; + + if (guid2 == 0) + return (0); + if (guid1 == 0) + return (1); + + nvfs = fsavl_find(avl, guid1, &snapname); + VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); + (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); + guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); + if (guid1hdl == NULL) + return (-1); + + nvfs = fsavl_find(avl, guid2, &snapname); + VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); + (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname); + guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT); + if (guid2hdl == NULL) { + zfs_close(guid1hdl); + return (-1); + } + + create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG); + create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG); + + if (create1 < create2) + rv = -1; + else if (create1 > create2) + rv = +1; + else + rv = 0; + + zfs_close(guid1hdl); + zfs_close(guid2hdl); + + return (rv); +} + +static int +recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, + recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, + nvlist_t *renamed) +{ + nvlist_t *local_nv, *deleted = NULL; + avl_tree_t *local_avl; + nvpair_t *fselem, *nextfselem; + char *fromsnap; + char newname[ZFS_MAXNAMELEN]; + char guidname[32]; + int error; + boolean_t needagain, progress, recursive; + char *s1, *s2; + + VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap)); + + recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == + ENOENT); + + if (flags->dryrun) + return (0); + +again: + needagain = progress = B_FALSE; + + VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0)); + + if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, + recursive, &local_nv, &local_avl)) != 0) + return (error); + + /* + * Process deletes and renames + */ + for (fselem = nvlist_next_nvpair(local_nv, NULL); + fselem; fselem = nextfselem) { + nvlist_t *nvfs, *snaps; + nvlist_t *stream_nvfs = NULL; + nvpair_t *snapelem, *nextsnapelem; + uint64_t fromguid = 0; + uint64_t originguid = 0; + uint64_t stream_originguid = 0; + uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid; + char *fsname, *stream_fsname; + + nextfselem = nvlist_next_nvpair(local_nv, fselem); + + VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs)); + VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps)); + VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname)); + VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap", + &parent_fromsnap_guid)); + (void) nvlist_lookup_uint64(nvfs, "origin", &originguid); + + /* + * First find the stream's fs, so we can check for + * a different origin (due to "zfs promote") + */ + for (snapelem = nvlist_next_nvpair(snaps, NULL); + snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) { + uint64_t thisguid; + + VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); + stream_nvfs = fsavl_find(stream_avl, thisguid, NULL); + + if (stream_nvfs != NULL) + break; + } + + /* check for promote */ + (void) nvlist_lookup_uint64(stream_nvfs, "origin", + &stream_originguid); + if (stream_nvfs && originguid != stream_originguid) { + switch (created_before(hdl, local_avl, + stream_originguid, originguid)) { + case 1: { + /* promote it! */ + zfs_cmd_t zc = { 0 }; + nvlist_t *origin_nvfs; + char *origin_fsname; + + if (flags->verbose) + (void) printf("promoting %s\n", fsname); + + origin_nvfs = fsavl_find(local_avl, originguid, + NULL); + VERIFY(0 == nvlist_lookup_string(origin_nvfs, + "name", &origin_fsname)); + (void) strlcpy(zc.zc_value, origin_fsname, + sizeof (zc.zc_value)); + (void) strlcpy(zc.zc_name, fsname, + sizeof (zc.zc_name)); + error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + if (error == 0) + progress = B_TRUE; + break; + } + default: + break; + case -1: + fsavl_destroy(local_avl); + nvlist_free(local_nv); + return (-1); + } + /* + * We had/have the wrong origin, therefore our + * list of snapshots is wrong. Need to handle + * them on the next pass. + */ + needagain = B_TRUE; + continue; + } + + for (snapelem = nvlist_next_nvpair(snaps, NULL); + snapelem; snapelem = nextsnapelem) { + uint64_t thisguid; + char *stream_snapname; + nvlist_t *found, *props; + + nextsnapelem = nvlist_next_nvpair(snaps, snapelem); + + VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid)); + found = fsavl_find(stream_avl, thisguid, + &stream_snapname); + + /* check for delete */ + if (found == NULL) { + char name[ZFS_MAXNAMELEN]; + + if (!flags->force) + continue; + + (void) snprintf(name, sizeof (name), "%s@%s", + fsname, nvpair_name(snapelem)); + + error = recv_destroy(hdl, name, + strlen(fsname)+1, newname, flags); + if (error) + needagain = B_TRUE; + else + progress = B_TRUE; + sprintf(guidname, "%lu", thisguid); + nvlist_add_boolean(deleted, guidname); + continue; + } + + stream_nvfs = found; + + if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops", + &props) && 0 == nvlist_lookup_nvlist(props, + stream_snapname, &props)) { + zfs_cmd_t zc = { 0 }; + + zc.zc_cookie = B_TRUE; /* received */ + (void) snprintf(zc.zc_name, sizeof (zc.zc_name), + "%s@%s", fsname, nvpair_name(snapelem)); + if (zcmd_write_src_nvlist(hdl, &zc, + props) == 0) { + (void) zfs_ioctl(hdl, + ZFS_IOC_SET_PROP, &zc); + zcmd_free_nvlists(&zc); + } + } + + /* check for different snapname */ + if (strcmp(nvpair_name(snapelem), + stream_snapname) != 0) { + char name[ZFS_MAXNAMELEN]; + char tryname[ZFS_MAXNAMELEN]; + + (void) snprintf(name, sizeof (name), "%s@%s", + fsname, nvpair_name(snapelem)); + (void) snprintf(tryname, sizeof (name), "%s@%s", + fsname, stream_snapname); + + error = recv_rename(hdl, name, tryname, + strlen(fsname)+1, newname, flags); + if (error) + needagain = B_TRUE; + else + progress = B_TRUE; + } + + if (strcmp(stream_snapname, fromsnap) == 0) + fromguid = thisguid; + } + + /* check for delete */ + if (stream_nvfs == NULL) { + if (!flags->force) + continue; + + error = recv_destroy(hdl, fsname, strlen(tofs)+1, + newname, flags); + if (error) + needagain = B_TRUE; + else + progress = B_TRUE; + sprintf(guidname, "%lu", parent_fromsnap_guid); + nvlist_add_boolean(deleted, guidname); + continue; + } + + if (fromguid == 0) { + if (flags->verbose) { + (void) printf("local fs %s does not have " + "fromsnap (%s in stream); must have " + "been deleted locally; ignoring\n", + fsname, fromsnap); + } + continue; + } + + VERIFY(0 == nvlist_lookup_string(stream_nvfs, + "name", &stream_fsname)); + VERIFY(0 == nvlist_lookup_uint64(stream_nvfs, + "parentfromsnap", &stream_parent_fromsnap_guid)); + + s1 = strrchr(fsname, '/'); + s2 = strrchr(stream_fsname, '/'); + + /* + * Check if we're going to rename based on parent guid change + * and the current parent guid was also deleted. If it was then + * rename will fail and is likely unneeded, so avoid this and + * force an early retry to determine the new + * parent_fromsnap_guid. + */ + if (stream_parent_fromsnap_guid != 0 && + parent_fromsnap_guid != 0 && + stream_parent_fromsnap_guid != parent_fromsnap_guid) { + sprintf(guidname, "%lu", parent_fromsnap_guid); + if (nvlist_exists(deleted, guidname)) { + progress = B_TRUE; + needagain = B_TRUE; + goto doagain; + } + } + + /* + * Check for rename. If the exact receive path is specified, it + * does not count as a rename, but we still need to check the + * datasets beneath it. + */ + if ((stream_parent_fromsnap_guid != 0 && + parent_fromsnap_guid != 0 && + stream_parent_fromsnap_guid != parent_fromsnap_guid) || + ((flags->isprefix || strcmp(tofs, fsname) != 0) && + (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) { + nvlist_t *parent; + char tryname[ZFS_MAXNAMELEN]; + + parent = fsavl_find(local_avl, + stream_parent_fromsnap_guid, NULL); + /* + * NB: parent might not be found if we used the + * tosnap for stream_parent_fromsnap_guid, + * because the parent is a newly-created fs; + * we'll be able to rename it after we recv the + * new fs. + */ + if (parent != NULL) { + char *pname; + + VERIFY(0 == nvlist_lookup_string(parent, "name", + &pname)); + (void) snprintf(tryname, sizeof (tryname), + "%s%s", pname, strrchr(stream_fsname, '/')); + } else { + tryname[0] = '\0'; + if (flags->verbose) { + (void) printf("local fs %s new parent " + "not found\n", fsname); + } + } + + newname[0] = '\0'; + + error = recv_rename(hdl, fsname, tryname, + strlen(tofs)+1, newname, flags); + + if (renamed != NULL && newname[0] != '\0') { + VERIFY(0 == nvlist_add_boolean(renamed, + newname)); + } + + if (error) + needagain = B_TRUE; + else + progress = B_TRUE; + } + } + +doagain: + fsavl_destroy(local_avl); + nvlist_free(local_nv); + nvlist_free(deleted); + + if (needagain && progress) { + /* do another pass to fix up temporary names */ + if (flags->verbose) + (void) printf("another pass:\n"); + goto again; + } + + return (needagain); +} + +static int +zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, + recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc, + char **top_zfs, int cleanup_fd, uint64_t *action_handlep) +{ + nvlist_t *stream_nv = NULL; + avl_tree_t *stream_avl = NULL; + char *fromsnap = NULL; + char *cp; + char tofs[ZFS_MAXNAMELEN]; + char sendfs[ZFS_MAXNAMELEN]; + char errbuf[1024]; + dmu_replay_record_t drre; + int error; + boolean_t anyerr = B_FALSE; + boolean_t softerr = B_FALSE; + boolean_t recursive; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive")); + + assert(drr->drr_type == DRR_BEGIN); + assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC); + assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) == + DMU_COMPOUNDSTREAM); + + /* + * Read in the nvlist from the stream. + */ + if (drr->drr_payloadlen != 0) { + error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen, + &stream_nv, flags->byteswap, zc); + if (error) { + error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; + } + } + + recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == + ENOENT); + + if (recursive && strchr(destname, '@')) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot specify snapshot name for multi-snapshot stream")); + error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; + } + + /* + * Read in the end record and verify checksum. + */ + if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre), + flags->byteswap, NULL))) + goto out; + if (flags->byteswap) { + drre.drr_type = BSWAP_32(drre.drr_type); + drre.drr_u.drr_end.drr_checksum.zc_word[0] = + BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]); + drre.drr_u.drr_end.drr_checksum.zc_word[1] = + BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]); + drre.drr_u.drr_end.drr_checksum.zc_word[2] = + BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]); + drre.drr_u.drr_end.drr_checksum.zc_word[3] = + BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]); + } + if (drre.drr_type != DRR_END) { + error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; + } + if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incorrect header checksum")); + error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; + } + + (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap); + + if (drr->drr_payloadlen != 0) { + nvlist_t *stream_fss; + + VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", + &stream_fss)); + if ((stream_avl = fsavl_create(stream_fss)) == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "couldn't allocate avl tree")); + error = zfs_error(hdl, EZFS_NOMEM, errbuf); + goto out; + } + + if (fromsnap != NULL) { + nvlist_t *renamed = NULL; + nvpair_t *pair = NULL; + + (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN); + if (flags->isprefix) { + struct drr_begin *drrb = &drr->drr_u.drr_begin; + int i; + + if (flags->istail) { + cp = strrchr(drrb->drr_toname, '/'); + if (cp == NULL) { + (void) strlcat(tofs, "/", + ZFS_MAXNAMELEN); + i = 0; + } else { + i = (cp - drrb->drr_toname); + } + } else { + i = strcspn(drrb->drr_toname, "/@"); + } + /* zfs_receive_one() will create_parents() */ + (void) strlcat(tofs, &drrb->drr_toname[i], + ZFS_MAXNAMELEN); + *strchr(tofs, '@') = '\0'; + } + + if (recursive && !flags->dryrun && !flags->nomount) { + VERIFY(0 == nvlist_alloc(&renamed, + NV_UNIQUE_NAME, 0)); + } + + softerr = recv_incremental_replication(hdl, tofs, flags, + stream_nv, stream_avl, renamed); + + /* Unmount renamed filesystems before receiving. */ + while ((pair = nvlist_next_nvpair(renamed, + pair)) != NULL) { + zfs_handle_t *zhp; + prop_changelist_t *clp = NULL; + + zhp = zfs_open(hdl, nvpair_name(pair), + ZFS_TYPE_FILESYSTEM); + if (zhp != NULL) { + clp = changelist_gather(zhp, + ZFS_PROP_MOUNTPOINT, 0, 0); + zfs_close(zhp); + if (clp != NULL) { + softerr |= + changelist_prefix(clp); + changelist_free(clp); + } + } + } + + nvlist_free(renamed); + } + } + + /* + * Get the fs specified by the first path in the stream (the top level + * specified by 'zfs send') and pass it to each invocation of + * zfs_receive_one(). + */ + (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname, + ZFS_MAXNAMELEN); + if ((cp = strchr(sendfs, '@')) != NULL) + *cp = '\0'; + + /* Finally, receive each contained stream */ + do { + /* + * we should figure out if it has a recoverable + * error, in which case do a recv_skip() and drive on. + * Note, if we fail due to already having this guid, + * zfs_receive_one() will take care of it (ie, + * recv_skip() and return 0). + */ + error = zfs_receive_impl(hdl, destname, flags, fd, + sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, + action_handlep); + if (error == ENODATA) { + error = 0; + break; + } + anyerr |= error; + } while (error == 0); + + if (drr->drr_payloadlen != 0 && fromsnap != NULL) { + /* + * Now that we have the fs's they sent us, try the + * renames again. + */ + softerr = recv_incremental_replication(hdl, tofs, flags, + stream_nv, stream_avl, NULL); + } + +out: + fsavl_destroy(stream_avl); + if (stream_nv) + nvlist_free(stream_nv); + if (softerr) + error = -2; + if (anyerr) + error = -1; + return (error); +} + +static void +trunc_prop_errs(int truncated) +{ + ASSERT(truncated != 0); + + if (truncated == 1) + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "1 more property could not be set\n")); + else + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "%d more properties could not be set\n"), truncated); +} + +static int +recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) +{ + dmu_replay_record_t *drr; + void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE); + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive:")); + + /* XXX would be great to use lseek if possible... */ + drr = buf; + + while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t), + byteswap, NULL) == 0) { + if (byteswap) + drr->drr_type = BSWAP_32(drr->drr_type); + + switch (drr->drr_type) { + case DRR_BEGIN: + /* NB: not to be used on v2 stream packages */ + if (drr->drr_payloadlen != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid substream header")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + break; + + case DRR_END: + free(buf); + return (0); + + case DRR_OBJECT: + if (byteswap) { + drr->drr_u.drr_object.drr_bonuslen = + BSWAP_32(drr->drr_u.drr_object. + drr_bonuslen); + } + (void) recv_read(hdl, fd, buf, + P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8), + B_FALSE, NULL); + break; + + case DRR_WRITE: + if (byteswap) { + drr->drr_u.drr_write.drr_length = + BSWAP_64(drr->drr_u.drr_write.drr_length); + } + (void) recv_read(hdl, fd, buf, + drr->drr_u.drr_write.drr_length, B_FALSE, NULL); + break; + case DRR_SPILL: + if (byteswap) { + drr->drr_u.drr_write.drr_length = + BSWAP_64(drr->drr_u.drr_spill.drr_length); + } + (void) recv_read(hdl, fd, buf, + drr->drr_u.drr_spill.drr_length, B_FALSE, NULL); + break; + case DRR_WRITE_EMBEDDED: + if (byteswap) { + drr->drr_u.drr_write_embedded.drr_psize = + BSWAP_32(drr->drr_u.drr_write_embedded. + drr_psize); + } + (void) recv_read(hdl, fd, buf, + P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize, + 8), B_FALSE, NULL); + break; + case DRR_WRITE_BYREF: + case DRR_FREEOBJECTS: + case DRR_FREE: + break; + + default: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid record type")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + } + + free(buf); + return (-1); +} + +/* + * Restores a backup of tosnap from the file descriptor specified by infd. + */ +static int +zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, + recvflags_t *flags, dmu_replay_record_t *drr, + dmu_replay_record_t *drr_noswap, const char *sendfs, + nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, + uint64_t *action_handlep) +{ + zfs_cmd_t zc = { 0 }; + time_t begin_time; + int ioctl_err, ioctl_errno, err; + char *cp; + struct drr_begin *drrb = &drr->drr_u.drr_begin; + char errbuf[1024]; + char prop_errbuf[1024]; + const char *chopprefix; + boolean_t newfs = B_FALSE; + boolean_t stream_wantsnewfs; + uint64_t parent_snapguid = 0; + prop_changelist_t *clp = NULL; + nvlist_t *snapprops_nvlist = NULL; + zprop_errflags_t prop_errflags; + boolean_t recursive; + + begin_time = time(NULL); + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive")); + + recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == + ENOENT); + + if (stream_avl != NULL) { + char *snapname; + nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, + &snapname); + nvlist_t *props; + int ret; + + (void) nvlist_lookup_uint64(fs, "parentfromsnap", + &parent_snapguid); + err = nvlist_lookup_nvlist(fs, "props", &props); + if (err) + VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); + + if (flags->canmountoff) { + VERIFY(0 == nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); + } + ret = zcmd_write_src_nvlist(hdl, &zc, props); + if (err) + nvlist_free(props); + + if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) { + VERIFY(0 == nvlist_lookup_nvlist(props, + snapname, &snapprops_nvlist)); + } + + if (ret != 0) + return (-1); + } + + cp = NULL; + + /* + * Determine how much of the snapshot name stored in the stream + * we are going to tack on to the name they specified on the + * command line, and how much we are going to chop off. + * + * If they specified a snapshot, chop the entire name stored in + * the stream. + */ + if (flags->istail) { + /* + * A filesystem was specified with -e. We want to tack on only + * the tail of the sent snapshot path. + */ + if (strchr(tosnap, '@')) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "argument - snapshot not allowed with -e")); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + + chopprefix = strrchr(sendfs, '/'); + + if (chopprefix == NULL) { + /* + * The tail is the poolname, so we need to + * prepend a path separator. + */ + int len = strlen(drrb->drr_toname); + cp = malloc(len + 2); + cp[0] = '/'; + (void) strcpy(&cp[1], drrb->drr_toname); + chopprefix = cp; + } else { + chopprefix = drrb->drr_toname + (chopprefix - sendfs); + } + } else if (flags->isprefix) { + /* + * A filesystem was specified with -d. We want to tack on + * everything but the first element of the sent snapshot path + * (all but the pool name). + */ + if (strchr(tosnap, '@')) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "argument - snapshot not allowed with -d")); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + + chopprefix = strchr(drrb->drr_toname, '/'); + if (chopprefix == NULL) + chopprefix = strchr(drrb->drr_toname, '@'); + } else if (strchr(tosnap, '@') == NULL) { + /* + * If a filesystem was specified without -d or -e, we want to + * tack on everything after the fs specified by 'zfs send'. + */ + chopprefix = drrb->drr_toname + strlen(sendfs); + } else { + /* A snapshot was specified as an exact path (no -d or -e). */ + if (recursive) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot specify snapshot name for multi-snapshot " + "stream")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + chopprefix = drrb->drr_toname + strlen(drrb->drr_toname); + } + + ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname); + ASSERT(chopprefix > drrb->drr_toname); + ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname)); + ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' || + chopprefix[0] == '\0'); + + /* + * Determine name of destination snapshot, store in zc_value. + */ + (void) strcpy(zc.zc_value, tosnap); + (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value)); +#ifdef __FreeBSD__ + if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) + zfs_ioctl_version = get_zfs_ioctl_version(); + /* + * For forward compatibility hide tosnap in zc_value + */ + if (zfs_ioctl_version < ZFS_IOCVER_LZC) + (void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap); +#endif + free(cp); + if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) { + zcmd_free_nvlists(&zc); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + + /* + * Determine the name of the origin snapshot, store in zc_string. + */ + if (drrb->drr_flags & DRR_FLAG_CLONE) { + if (guid_to_name(hdl, zc.zc_value, + drrb->drr_fromguid, zc.zc_string) != 0) { + zcmd_free_nvlists(&zc); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "local origin for clone %s does not exist"), + zc.zc_value); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } + if (flags->verbose) + (void) printf("found clone origin %s\n", zc.zc_string); + } + + stream_wantsnewfs = (drrb->drr_fromguid == 0 || + (drrb->drr_flags & DRR_FLAG_CLONE)); + + if (stream_wantsnewfs) { + /* + * if the parent fs does not exist, look for it based on + * the parent snap GUID + */ + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive new filesystem stream")); + + (void) strcpy(zc.zc_name, zc.zc_value); + cp = strrchr(zc.zc_name, '/'); + if (cp) + *cp = '\0'; + if (cp && + !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + char suffix[ZFS_MAXNAMELEN]; + (void) strcpy(suffix, strrchr(zc.zc_value, '/')); + if (guid_to_name(hdl, zc.zc_name, parent_snapguid, + zc.zc_value) == 0) { + *strchr(zc.zc_value, '@') = '\0'; + (void) strcat(zc.zc_value, suffix); + } + } + } else { + /* + * if the fs does not exist, look for it based on the + * fromsnap GUID + */ + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive incremental stream")); + + (void) strcpy(zc.zc_name, zc.zc_value); + *strchr(zc.zc_name, '@') = '\0'; + + /* + * If the exact receive path was specified and this is the + * topmost path in the stream, then if the fs does not exist we + * should look no further. + */ + if ((flags->isprefix || (*(chopprefix = drrb->drr_toname + + strlen(sendfs)) != '\0' && *chopprefix != '@')) && + !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + char snap[ZFS_MAXNAMELEN]; + (void) strcpy(snap, strchr(zc.zc_value, '@')); + if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid, + zc.zc_value) == 0) { + *strchr(zc.zc_value, '@') = '\0'; + (void) strcat(zc.zc_value, snap); + } + } + } + + (void) strcpy(zc.zc_name, zc.zc_value); + *strchr(zc.zc_name, '@') = '\0'; + + if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + zfs_handle_t *zhp; + + /* + * Destination fs exists. Therefore this should either + * be an incremental, or the stream specifies a new fs + * (full stream or clone) and they want us to blow it + * away (and have therefore specified -F and removed any + * snapshots). + */ + if (stream_wantsnewfs) { + if (!flags->force) { + zcmd_free_nvlists(&zc); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination '%s' exists\n" + "must specify -F to overwrite it"), + zc.zc_name); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + } + if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, + &zc) == 0) { + zcmd_free_nvlists(&zc); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination has snapshots (eg. %s)\n" + "must destroy them to overwrite it"), + zc.zc_name); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + } + } + + if ((zhp = zfs_open(hdl, zc.zc_name, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { + zcmd_free_nvlists(&zc); + return (-1); + } + + if (stream_wantsnewfs && + zhp->zfs_dmustats.dds_origin[0]) { + zcmd_free_nvlists(&zc); + zfs_close(zhp); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination '%s' is a clone\n" + "must destroy it to overwrite it"), + zc.zc_name); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + } + + if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && + stream_wantsnewfs) { + /* We can't do online recv in this case */ + clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0); + if (clp == NULL) { + zfs_close(zhp); + zcmd_free_nvlists(&zc); + return (-1); + } + if (changelist_prefix(clp) != 0) { + changelist_free(clp); + zfs_close(zhp); + zcmd_free_nvlists(&zc); + return (-1); + } + } + zfs_close(zhp); + } else { + /* + * Destination filesystem does not exist. Therefore we better + * be creating a new filesystem (either from a full backup, or + * a clone). It would therefore be invalid if the user + * specified only the pool name (i.e. if the destination name + * contained no slash character). + */ + if (!stream_wantsnewfs || + (cp = strrchr(zc.zc_name, '/')) == NULL) { + zcmd_free_nvlists(&zc); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination '%s' does not exist"), zc.zc_name); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } + + /* + * Trim off the final dataset component so we perform the + * recvbackup ioctl to the filesystems's parent. + */ + *cp = '\0'; + + if (flags->isprefix && !flags->istail && !flags->dryrun && + create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) { + zcmd_free_nvlists(&zc); + return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); + } + + newfs = B_TRUE; + } + + zc.zc_begin_record = drr_noswap->drr_u.drr_begin; + zc.zc_cookie = infd; + zc.zc_guid = flags->force; + if (flags->verbose) { + (void) printf("%s %s stream of %s into %s\n", + flags->dryrun ? "would receive" : "receiving", + drrb->drr_fromguid ? "incremental" : "full", + drrb->drr_toname, zc.zc_value); + (void) fflush(stdout); + } + + if (flags->dryrun) { + zcmd_free_nvlists(&zc); + return (recv_skip(hdl, infd, flags->byteswap)); + } + + zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; + zc.zc_nvlist_dst_size = sizeof (prop_errbuf); + zc.zc_cleanup_fd = cleanup_fd; + zc.zc_action_handle = *action_handlep; + + err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc); + ioctl_errno = errno; + prop_errflags = (zprop_errflags_t)zc.zc_obj; + + if (err == 0) { + nvlist_t *prop_errors; + VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, + zc.zc_nvlist_dst_size, &prop_errors, 0)); + + nvpair_t *prop_err = NULL; + + while ((prop_err = nvlist_next_nvpair(prop_errors, + prop_err)) != NULL) { + char tbuf[1024]; + zfs_prop_t prop; + int intval; + + prop = zfs_name_to_prop(nvpair_name(prop_err)); + (void) nvpair_value_int32(prop_err, &intval); + if (strcmp(nvpair_name(prop_err), + ZPROP_N_MORE_ERRORS) == 0) { + trunc_prop_errs(intval); + break; + } else { + (void) snprintf(tbuf, sizeof (tbuf), + dgettext(TEXT_DOMAIN, + "cannot receive %s property on %s"), + nvpair_name(prop_err), zc.zc_name); + zfs_setprop_error(hdl, prop, intval, tbuf); + } + } + nvlist_free(prop_errors); + } + + zc.zc_nvlist_dst = 0; + zc.zc_nvlist_dst_size = 0; + zcmd_free_nvlists(&zc); + + if (err == 0 && snapprops_nvlist) { + zfs_cmd_t zc2 = { 0 }; + + (void) strcpy(zc2.zc_name, zc.zc_value); + zc2.zc_cookie = B_TRUE; /* received */ + if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) { + (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2); + zcmd_free_nvlists(&zc2); + } + } + + if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) { + /* + * It may be that this snapshot already exists, + * in which case we want to consume & ignore it + * rather than failing. + */ + avl_tree_t *local_avl; + nvlist_t *local_nv, *fs; + cp = strchr(zc.zc_value, '@'); + + /* + * XXX Do this faster by just iterating over snaps in + * this fs. Also if zc_value does not exist, we will + * get a strange "does not exist" error message. + */ + *cp = '\0'; + if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, + &local_nv, &local_avl) == 0) { + *cp = '@'; + fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); + fsavl_destroy(local_avl); + nvlist_free(local_nv); + + if (fs != NULL) { + if (flags->verbose) { + (void) printf("snap %s already exists; " + "ignoring\n", zc.zc_value); + } + err = ioctl_err = recv_skip(hdl, infd, + flags->byteswap); + } + } + *cp = '@'; + } + + if (ioctl_err != 0) { + switch (ioctl_errno) { + case ENODEV: + cp = strchr(zc.zc_value, '@'); + *cp = '\0'; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "most recent snapshot of %s does not\n" + "match incremental source"), zc.zc_value); + (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); + *cp = '@'; + break; + case ETXTBSY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination %s has been modified\n" + "since most recent snapshot"), zc.zc_name); + (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); + break; + case EEXIST: + cp = strchr(zc.zc_value, '@'); + if (newfs) { + /* it's the containing fs that exists */ + *cp = '\0'; + } + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination already exists")); + (void) zfs_error_fmt(hdl, EZFS_EXISTS, + dgettext(TEXT_DOMAIN, "cannot restore to %s"), + zc.zc_value); + *cp = '@'; + break; + case EINVAL: + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); + break; + case ECKSUM: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid stream (checksum mismatch)")); + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); + break; + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded to receive this stream.")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EDQUOT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination %s space quota exceeded"), zc.zc_name); + (void) zfs_error(hdl, EZFS_NOSPC, errbuf); + break; + default: + (void) zfs_standard_error(hdl, ioctl_errno, errbuf); + } + } + + /* + * Mount the target filesystem (if created). Also mount any + * children of the target filesystem if we did a replication + * receive (indicated by stream_avl being non-NULL). + */ + cp = strchr(zc.zc_value, '@'); + if (cp && (ioctl_err == 0 || !newfs)) { + zfs_handle_t *h; + + *cp = '\0'; + h = zfs_open(hdl, zc.zc_value, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (h != NULL) { + if (h->zfs_type == ZFS_TYPE_VOLUME) { + *cp = '@'; + } else if (newfs || stream_avl) { + /* + * Track the first/top of hierarchy fs, + * for mounting and sharing later. + */ + if (top_zfs && *top_zfs == NULL) + *top_zfs = zfs_strdup(hdl, zc.zc_value); + } + zfs_close(h); + } + *cp = '@'; + } + + if (clp) { + err |= changelist_postfix(clp); + changelist_free(clp); + } + + if (prop_errflags & ZPROP_ERR_NOCLEAR) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " + "failed to clear unreceived properties on %s"), + zc.zc_name); + (void) fprintf(stderr, "\n"); + } + if (prop_errflags & ZPROP_ERR_NORESTORE) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " + "failed to restore original properties on %s"), + zc.zc_name); + (void) fprintf(stderr, "\n"); + } + + if (err || ioctl_err) + return (-1); + + *action_handlep = zc.zc_action_handle; + + if (flags->verbose) { + char buf1[64]; + char buf2[64]; + uint64_t bytes = zc.zc_cookie; + time_t delta = time(NULL) - begin_time; + if (delta == 0) + delta = 1; + zfs_nicenum(bytes, buf1, sizeof (buf1)); + zfs_nicenum(bytes/delta, buf2, sizeof (buf1)); + + (void) printf("received %sB stream in %lu seconds (%sB/sec)\n", + buf1, delta, buf2); + } + + return (0); +} + +static int +zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, + int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, + char **top_zfs, int cleanup_fd, uint64_t *action_handlep) +{ + int err; + dmu_replay_record_t drr, drr_noswap; + struct drr_begin *drrb = &drr.drr_u.drr_begin; + char errbuf[1024]; + zio_cksum_t zcksum = { 0 }; + uint64_t featureflags; + int hdrtype; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive")); + + if (flags->isprefix && + !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs " + "(%s) does not exist"), tosnap); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } + + /* read in the BEGIN record */ + if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE, + &zcksum))) + return (err); + + if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) { + /* It's the double end record at the end of a package */ + return (ENODATA); + } + + /* the kernel needs the non-byteswapped begin record */ + drr_noswap = drr; + + flags->byteswap = B_FALSE; + if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { + /* + * We computed the checksum in the wrong byteorder in + * recv_read() above; do it again correctly. + */ + bzero(&zcksum, sizeof (zio_cksum_t)); + fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum); + flags->byteswap = B_TRUE; + + drr.drr_type = BSWAP_32(drr.drr_type); + drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen); + drrb->drr_magic = BSWAP_64(drrb->drr_magic); + drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); + drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); + drrb->drr_type = BSWAP_32(drrb->drr_type); + drrb->drr_flags = BSWAP_32(drrb->drr_flags); + drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); + drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); + } + + if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "stream (bad magic number)")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + + featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo); + + if (!DMU_STREAM_SUPPORTED(featureflags) || + (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "stream has unsupported feature, feature flags = %lx"), + featureflags); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + + if (strchr(drrb->drr_toname, '@') == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "stream (bad snapshot name)")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + + if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) { + char nonpackage_sendfs[ZFS_MAXNAMELEN]; + if (sendfs == NULL) { + /* + * We were not called from zfs_receive_package(). Get + * the fs specified by 'zfs send'. + */ + char *cp; + (void) strlcpy(nonpackage_sendfs, + drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN); + if ((cp = strchr(nonpackage_sendfs, '@')) != NULL) + *cp = '\0'; + sendfs = nonpackage_sendfs; + } + return (zfs_receive_one(hdl, infd, tosnap, flags, + &drr, &drr_noswap, sendfs, stream_nv, stream_avl, + top_zfs, cleanup_fd, action_handlep)); + } else { + assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == + DMU_COMPOUNDSTREAM); + return (zfs_receive_package(hdl, infd, tosnap, flags, + &drr, &zcksum, top_zfs, cleanup_fd, action_handlep)); + } +} + +/* + * Restores a backup of tosnap from the file descriptor specified by infd. + * Return 0 on total success, -2 if some things couldn't be + * destroyed/renamed/promoted, -1 if some things couldn't be received. + * (-1 will override -2). + */ +int +zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, + int infd, avl_tree_t *stream_avl) +{ + char *top_zfs = NULL; + int err; + int cleanup_fd; + uint64_t action_handle = 0; + + cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); + VERIFY(cleanup_fd >= 0); + + err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL, + stream_avl, &top_zfs, cleanup_fd, &action_handle); + + VERIFY(0 == close(cleanup_fd)); + + if (err == 0 && !flags->nomount && top_zfs) { + zfs_handle_t *zhp; + prop_changelist_t *clp; + + zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM); + if (zhp != NULL) { + clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, + CL_GATHER_MOUNT_ALWAYS, 0); + zfs_close(zhp); + if (clp != NULL) { + /* mount and share received datasets */ + err = changelist_postfix(clp); + changelist_free(clp); + } + } + if (zhp == NULL || clp == NULL || err) + err = -1; + } + if (top_zfs) + free(top_zfs); + + return (err); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c new file mode 100644 index 0000000..906883c --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c @@ -0,0 +1,467 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. + */ + +/* + * This file contains the functions which analyze the status of a pool. This + * include both the status of an active pool, as well as the status exported + * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of + * the pool. This status is independent (to a certain degree) from the state of + * the pool. A pool's state describes only whether or not it is capable of + * providing the necessary fault tolerance for data. The status describes the + * overall status of devices. A pool that is online can still have a device + * that is experiencing errors. + * + * Only a subset of the possible faults can be detected using 'zpool status', + * and not all possible errors correspond to a FMA message ID. The explanation + * is left up to the caller, depending on whether it is a live pool or an + * import. + */ + +#include <libzfs.h> +#include <string.h> +#include <unistd.h> +#include "libzfs_impl.h" +#include "zfeature_common.h" + +/* + * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines + * in libzfs.h. Note that there are some status results which go past the end + * of this table, and hence have no associated message ID. + */ +static char *zfs_msgid_table[] = { + "ZFS-8000-14", + "ZFS-8000-2Q", + "ZFS-8000-3C", + "ZFS-8000-4J", + "ZFS-8000-5E", + "ZFS-8000-6X", + "ZFS-8000-72", + "ZFS-8000-8A", + "ZFS-8000-9P", + "ZFS-8000-A5", + "ZFS-8000-EY", + "ZFS-8000-HC", + "ZFS-8000-JQ", + "ZFS-8000-K4", +}; + +#define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) + +/* ARGSUSED */ +static int +vdev_missing(vdev_stat_t *vs, uint_t vsc) +{ + return (vs->vs_state == VDEV_STATE_CANT_OPEN && + vs->vs_aux == VDEV_AUX_OPEN_FAILED); +} + +/* ARGSUSED */ +static int +vdev_faulted(vdev_stat_t *vs, uint_t vsc) +{ + return (vs->vs_state == VDEV_STATE_FAULTED); +} + +/* ARGSUSED */ +static int +vdev_errors(vdev_stat_t *vs, uint_t vsc) +{ + return (vs->vs_state == VDEV_STATE_DEGRADED || + vs->vs_read_errors != 0 || vs->vs_write_errors != 0 || + vs->vs_checksum_errors != 0); +} + +/* ARGSUSED */ +static int +vdev_broken(vdev_stat_t *vs, uint_t vsc) +{ + return (vs->vs_state == VDEV_STATE_CANT_OPEN); +} + +/* ARGSUSED */ +static int +vdev_offlined(vdev_stat_t *vs, uint_t vsc) +{ + return (vs->vs_state == VDEV_STATE_OFFLINE); +} + +/* ARGSUSED */ +static int +vdev_removed(vdev_stat_t *vs, uint_t vsc) +{ + return (vs->vs_state == VDEV_STATE_REMOVED); +} + +static int +vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) +{ + return (VDEV_STAT_VALID(vs_physical_ashift, vsc) && + vs->vs_configured_ashift < vs->vs_physical_ashift); +} + +/* + * Detect if any leaf devices that have seen errors or could not be opened. + */ +static boolean_t +find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), + boolean_t ignore_replacing) +{ + nvlist_t **child; + vdev_stat_t *vs; + uint_t c, vsc, children; + + /* + * Ignore problems within a 'replacing' vdev, since we're presumably in + * the process of repairing any such errors, and don't want to call them + * out again. We'll pick up the fact that a resilver is happening + * later. + */ + if (ignore_replacing == B_TRUE) { + char *type; + + verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, + &type) == 0); + if (strcmp(type, VDEV_TYPE_REPLACING) == 0) + return (B_FALSE); + } + + if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, + &children) == 0) { + for (c = 0; c < children; c++) + if (find_vdev_problem(child[c], func, ignore_replacing)) + return (B_TRUE); + } else { + verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + + if (func(vs, vsc) != 0) + return (B_TRUE); + } + + /* + * Check any L2 cache devs + */ + if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child, + &children) == 0) { + for (c = 0; c < children; c++) + if (find_vdev_problem(child[c], func, ignore_replacing)) + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * Active pool health status. + * + * To determine the status for a pool, we make several passes over the config, + * picking the most egregious error we find. In order of importance, we do the + * following: + * + * - Check for a complete and valid configuration + * - Look for any faulted or missing devices in a non-replicated config + * - Check for any data errors + * - Check for any faulted or missing devices in a replicated config + * - Look for any devices showing errors + * - Check for any resilvering devices + * + * There can obviously be multiple errors within a single pool, so this routine + * only picks the most damaging of all the current errors to report. + */ +static zpool_status_t +check_status(nvlist_t *config, boolean_t isimport) +{ + nvlist_t *nvroot; + vdev_stat_t *vs; + pool_scan_stat_t *ps = NULL; + uint_t vsc, psc; + uint64_t nerr; + uint64_t version; + uint64_t stateval; + uint64_t suspended; + uint64_t hostid = 0; + + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &version) == 0); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, + &stateval) == 0); + + /* + * Currently resilvering a vdev + */ + (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, + (uint64_t **)&ps, &psc); + if (ps && ps->pss_func == POOL_SCAN_RESILVER && + ps->pss_state == DSS_SCANNING) + return (ZPOOL_STATUS_RESILVERING); + + /* + * Pool last accessed by another system. + */ + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); + if (hostid != 0 && (unsigned long)hostid != gethostid() && + stateval == POOL_STATE_ACTIVE) + return (ZPOOL_STATUS_HOSTID_MISMATCH); + + /* + * Newer on-disk version. + */ + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + vs->vs_aux == VDEV_AUX_VERSION_NEWER) + return (ZPOOL_STATUS_VERSION_NEWER); + + /* + * Unsupported feature(s). + */ + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + vs->vs_aux == VDEV_AUX_UNSUP_FEAT) { + nvlist_t *nvinfo; + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, + &nvinfo) == 0); + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY)) + return (ZPOOL_STATUS_UNSUP_FEAT_WRITE); + return (ZPOOL_STATUS_UNSUP_FEAT_READ); + } + + /* + * Check that the config is complete. + */ + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) + return (ZPOOL_STATUS_BAD_GUID_SUM); + + /* + * Check whether the pool has suspended due to failed I/O. + */ + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, + &suspended) == 0) { + if (suspended == ZIO_FAILURE_MODE_CONTINUE) + return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); + return (ZPOOL_STATUS_IO_FAILURE_WAIT); + } + + /* + * Could not read a log. + */ + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + vs->vs_aux == VDEV_AUX_BAD_LOG) { + return (ZPOOL_STATUS_BAD_LOG); + } + + /* + * Bad devices in non-replicated config. + */ + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) + return (ZPOOL_STATUS_FAULTED_DEV_NR); + + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + find_vdev_problem(nvroot, vdev_missing, B_TRUE)) + return (ZPOOL_STATUS_MISSING_DEV_NR); + + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + find_vdev_problem(nvroot, vdev_broken, B_TRUE)) + return (ZPOOL_STATUS_CORRUPT_LABEL_NR); + + /* + * Corrupted pool metadata + */ + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + vs->vs_aux == VDEV_AUX_CORRUPT_DATA) + return (ZPOOL_STATUS_CORRUPT_POOL); + + /* + * Persistent data errors. + */ + if (!isimport) { + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, + &nerr) == 0 && nerr != 0) + return (ZPOOL_STATUS_CORRUPT_DATA); + } + + /* + * Missing devices in a replicated config. + */ + if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) + return (ZPOOL_STATUS_FAULTED_DEV_R); + if (find_vdev_problem(nvroot, vdev_missing, B_TRUE)) + return (ZPOOL_STATUS_MISSING_DEV_R); + if (find_vdev_problem(nvroot, vdev_broken, B_TRUE)) + return (ZPOOL_STATUS_CORRUPT_LABEL_R); + + /* + * Devices with errors + */ + if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE)) + return (ZPOOL_STATUS_FAILING_DEV); + + /* + * Offlined devices + */ + if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE)) + return (ZPOOL_STATUS_OFFLINE_DEV); + + /* + * Removed device + */ + if (find_vdev_problem(nvroot, vdev_removed, B_TRUE)) + return (ZPOOL_STATUS_REMOVED_DEV); + + /* + * Suboptimal, but usable, ashift configuration. + */ + if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE)) + return (ZPOOL_STATUS_NON_NATIVE_ASHIFT); + + /* + * Outdated, but usable, version + */ + if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) + return (ZPOOL_STATUS_VERSION_OLDER); + + /* + * Usable pool with disabled features + */ + if (version >= SPA_VERSION_FEATURES) { + int i; + nvlist_t *feat; + + if (isimport) { + feat = fnvlist_lookup_nvlist(config, + ZPOOL_CONFIG_LOAD_INFO); + feat = fnvlist_lookup_nvlist(feat, + ZPOOL_CONFIG_ENABLED_FEAT); + } else { + feat = fnvlist_lookup_nvlist(config, + ZPOOL_CONFIG_FEATURE_STATS); + } + + for (i = 0; i < SPA_FEATURES; i++) { + zfeature_info_t *fi = &spa_feature_table[i]; + if (!nvlist_exists(feat, fi->fi_guid)) + return (ZPOOL_STATUS_FEAT_DISABLED); + } + } + + return (ZPOOL_STATUS_OK); +} + +zpool_status_t +zpool_get_status(zpool_handle_t *zhp, char **msgid) +{ + zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); + + if (ret >= NMSGID) + *msgid = NULL; + else + *msgid = zfs_msgid_table[ret]; + + return (ret); +} + +zpool_status_t +zpool_import_status(nvlist_t *config, char **msgid) +{ + zpool_status_t ret = check_status(config, B_TRUE); + + if (ret >= NMSGID) + *msgid = NULL; + else + *msgid = zfs_msgid_table[ret]; + + return (ret); +} + +static void +dump_ddt_stat(const ddt_stat_t *dds, int h) +{ + char refcnt[6]; + char blocks[6], lsize[6], psize[6], dsize[6]; + char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; + + if (dds == NULL || dds->dds_blocks == 0) + return; + + if (h == -1) + (void) strcpy(refcnt, "Total"); + else + zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt)); + + zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks)); + zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize)); + zfs_nicenum(dds->dds_psize, psize, sizeof (psize)); + zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize)); + zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks)); + zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize)); + zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize)); + zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize)); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + refcnt, + blocks, lsize, psize, dsize, + ref_blocks, ref_lsize, ref_psize, ref_dsize); +} + +/* + * Print the DDT histogram and the column totals. + */ +void +zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh) +{ + int h; + + (void) printf("\n"); + + (void) printf("bucket " + " allocated " + " referenced \n"); + (void) printf("______ " + "______________________________ " + "______________________________\n"); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + "refcnt", + "blocks", "LSIZE", "PSIZE", "DSIZE", + "blocks", "LSIZE", "PSIZE", "DSIZE"); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + "------", + "------", "-----", "-----", "-----", + "------", "-----", "-----", "-----"); + + for (h = 0; h < 64; h++) + dump_ddt_stat(&ddh->ddh_stat[h], h); + + dump_ddt_stat(dds_total, -1); + + (void) printf("\n"); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c new file mode 100644 index 0000000..3b59914 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c @@ -0,0 +1,1546 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * Internal utility routines for the ZFS library. + */ + +#include <sys/param.h> +#include <sys/linker.h> +#include <sys/module.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <libintl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <ctype.h> +#include <math.h> +#include <sys/mnttab.h> +#include <sys/mntent.h> +#include <sys/types.h> + +#include <libzfs.h> +#include <libzfs_core.h> + +#include "libzfs_impl.h" +#include "zfs_prop.h" +#include "zfeature_common.h" + +int aok; + +int +libzfs_errno(libzfs_handle_t *hdl) +{ + return (hdl->libzfs_error); +} + +const char * +libzfs_error_action(libzfs_handle_t *hdl) +{ + return (hdl->libzfs_action); +} + +const char * +libzfs_error_description(libzfs_handle_t *hdl) +{ + if (hdl->libzfs_desc[0] != '\0') + return (hdl->libzfs_desc); + + switch (hdl->libzfs_error) { + case EZFS_NOMEM: + return (dgettext(TEXT_DOMAIN, "out of memory")); + case EZFS_BADPROP: + return (dgettext(TEXT_DOMAIN, "invalid property value")); + case EZFS_PROPREADONLY: + return (dgettext(TEXT_DOMAIN, "read-only property")); + case EZFS_PROPTYPE: + return (dgettext(TEXT_DOMAIN, "property doesn't apply to " + "datasets of this type")); + case EZFS_PROPNONINHERIT: + return (dgettext(TEXT_DOMAIN, "property cannot be inherited")); + case EZFS_PROPSPACE: + return (dgettext(TEXT_DOMAIN, "invalid quota or reservation")); + case EZFS_BADTYPE: + return (dgettext(TEXT_DOMAIN, "operation not applicable to " + "datasets of this type")); + case EZFS_BUSY: + return (dgettext(TEXT_DOMAIN, "pool or dataset is busy")); + case EZFS_EXISTS: + return (dgettext(TEXT_DOMAIN, "pool or dataset exists")); + case EZFS_NOENT: + return (dgettext(TEXT_DOMAIN, "no such pool or dataset")); + case EZFS_BADSTREAM: + return (dgettext(TEXT_DOMAIN, "invalid backup stream")); + case EZFS_DSREADONLY: + return (dgettext(TEXT_DOMAIN, "dataset is read-only")); + case EZFS_VOLTOOBIG: + return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for " + "this system")); + case EZFS_INVALIDNAME: + return (dgettext(TEXT_DOMAIN, "invalid name")); + case EZFS_BADRESTORE: + return (dgettext(TEXT_DOMAIN, "unable to restore to " + "destination")); + case EZFS_BADBACKUP: + return (dgettext(TEXT_DOMAIN, "backup failed")); + case EZFS_BADTARGET: + return (dgettext(TEXT_DOMAIN, "invalid target vdev")); + case EZFS_NODEVICE: + return (dgettext(TEXT_DOMAIN, "no such device in pool")); + case EZFS_BADDEV: + return (dgettext(TEXT_DOMAIN, "invalid device")); + case EZFS_NOREPLICAS: + return (dgettext(TEXT_DOMAIN, "no valid replicas")); + case EZFS_RESILVERING: + return (dgettext(TEXT_DOMAIN, "currently resilvering")); + case EZFS_BADVERSION: + return (dgettext(TEXT_DOMAIN, "unsupported version or " + "feature")); + case EZFS_POOLUNAVAIL: + return (dgettext(TEXT_DOMAIN, "pool is unavailable")); + case EZFS_DEVOVERFLOW: + return (dgettext(TEXT_DOMAIN, "too many devices in one vdev")); + case EZFS_BADPATH: + return (dgettext(TEXT_DOMAIN, "must be an absolute path")); + case EZFS_CROSSTARGET: + return (dgettext(TEXT_DOMAIN, "operation crosses datasets or " + "pools")); + case EZFS_ZONED: + return (dgettext(TEXT_DOMAIN, "dataset in use by local zone")); + case EZFS_MOUNTFAILED: + return (dgettext(TEXT_DOMAIN, "mount failed")); + case EZFS_UMOUNTFAILED: + return (dgettext(TEXT_DOMAIN, "umount failed")); + case EZFS_UNSHARENFSFAILED: + return (dgettext(TEXT_DOMAIN, "unshare(1M) failed")); + case EZFS_SHARENFSFAILED: + return (dgettext(TEXT_DOMAIN, "share(1M) failed")); + case EZFS_UNSHARESMBFAILED: + return (dgettext(TEXT_DOMAIN, "smb remove share failed")); + case EZFS_SHARESMBFAILED: + return (dgettext(TEXT_DOMAIN, "smb add share failed")); + case EZFS_PERM: + return (dgettext(TEXT_DOMAIN, "permission denied")); + case EZFS_NOSPC: + return (dgettext(TEXT_DOMAIN, "out of space")); + case EZFS_FAULT: + return (dgettext(TEXT_DOMAIN, "bad address")); + case EZFS_IO: + return (dgettext(TEXT_DOMAIN, "I/O error")); + case EZFS_INTR: + return (dgettext(TEXT_DOMAIN, "signal received")); + case EZFS_ISSPARE: + return (dgettext(TEXT_DOMAIN, "device is reserved as a hot " + "spare")); + case EZFS_INVALCONFIG: + return (dgettext(TEXT_DOMAIN, "invalid vdev configuration")); + case EZFS_RECURSIVE: + return (dgettext(TEXT_DOMAIN, "recursive dataset dependency")); + case EZFS_NOHISTORY: + return (dgettext(TEXT_DOMAIN, "no history available")); + case EZFS_POOLPROPS: + return (dgettext(TEXT_DOMAIN, "failed to retrieve " + "pool properties")); + case EZFS_POOL_NOTSUP: + return (dgettext(TEXT_DOMAIN, "operation not supported " + "on this type of pool")); + case EZFS_POOL_INVALARG: + return (dgettext(TEXT_DOMAIN, "invalid argument for " + "this pool operation")); + case EZFS_NAMETOOLONG: + return (dgettext(TEXT_DOMAIN, "dataset name is too long")); + case EZFS_OPENFAILED: + return (dgettext(TEXT_DOMAIN, "open failed")); + case EZFS_NOCAP: + return (dgettext(TEXT_DOMAIN, + "disk capacity information could not be retrieved")); + case EZFS_LABELFAILED: + return (dgettext(TEXT_DOMAIN, "write of label failed")); + case EZFS_BADWHO: + return (dgettext(TEXT_DOMAIN, "invalid user/group")); + case EZFS_BADPERM: + return (dgettext(TEXT_DOMAIN, "invalid permission")); + case EZFS_BADPERMSET: + return (dgettext(TEXT_DOMAIN, "invalid permission set name")); + case EZFS_NODELEGATION: + return (dgettext(TEXT_DOMAIN, "delegated administration is " + "disabled on pool")); + case EZFS_BADCACHE: + return (dgettext(TEXT_DOMAIN, "invalid or missing cache file")); + case EZFS_ISL2CACHE: + return (dgettext(TEXT_DOMAIN, "device is in use as a cache")); + case EZFS_VDEVNOTSUP: + return (dgettext(TEXT_DOMAIN, "vdev specification is not " + "supported")); + case EZFS_NOTSUP: + return (dgettext(TEXT_DOMAIN, "operation not supported " + "on this dataset")); + case EZFS_ACTIVE_SPARE: + return (dgettext(TEXT_DOMAIN, "pool has active shared spare " + "device")); + case EZFS_UNPLAYED_LOGS: + return (dgettext(TEXT_DOMAIN, "log device has unplayed intent " + "logs")); + case EZFS_REFTAG_RELE: + return (dgettext(TEXT_DOMAIN, "no such tag on this dataset")); + case EZFS_REFTAG_HOLD: + return (dgettext(TEXT_DOMAIN, "tag already exists on this " + "dataset")); + case EZFS_TAGTOOLONG: + return (dgettext(TEXT_DOMAIN, "tag too long")); + case EZFS_PIPEFAILED: + return (dgettext(TEXT_DOMAIN, "pipe create failed")); + case EZFS_THREADCREATEFAILED: + return (dgettext(TEXT_DOMAIN, "thread create failed")); + case EZFS_POSTSPLIT_ONLINE: + return (dgettext(TEXT_DOMAIN, "disk was split from this pool " + "into a new one")); + case EZFS_SCRUBBING: + return (dgettext(TEXT_DOMAIN, "currently scrubbing; " + "use 'zpool scrub -s' to cancel current scrub")); + case EZFS_NO_SCRUB: + return (dgettext(TEXT_DOMAIN, "there is no active scrub")); + case EZFS_DIFF: + return (dgettext(TEXT_DOMAIN, "unable to generate diffs")); + case EZFS_DIFFDATA: + return (dgettext(TEXT_DOMAIN, "invalid diff data")); + case EZFS_POOLREADONLY: + return (dgettext(TEXT_DOMAIN, "pool is read-only")); + case EZFS_UNKNOWN: + return (dgettext(TEXT_DOMAIN, "unknown error")); + default: + assert(hdl->libzfs_error == 0); + return (dgettext(TEXT_DOMAIN, "no error")); + } +} + +/*PRINTFLIKE2*/ +void +zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + (void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc), + fmt, ap); + hdl->libzfs_desc_active = 1; + + va_end(ap); +} + +static void +zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap) +{ + (void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action), + fmt, ap); + hdl->libzfs_error = error; + + if (hdl->libzfs_desc_active) + hdl->libzfs_desc_active = 0; + else + hdl->libzfs_desc[0] = '\0'; + + if (hdl->libzfs_printerr) { + if (error == EZFS_UNKNOWN) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal " + "error: %s\n"), libzfs_error_description(hdl)); + abort(); + } + + (void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action, + libzfs_error_description(hdl)); + if (error == EZFS_NOMEM) + exit(1); + } +} + +int +zfs_error(libzfs_handle_t *hdl, int error, const char *msg) +{ + return (zfs_error_fmt(hdl, error, "%s", msg)); +} + +/*PRINTFLIKE3*/ +int +zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + zfs_verror(hdl, error, fmt, ap); + + va_end(ap); + + return (-1); +} + +static int +zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt, + va_list ap) +{ + switch (error) { + case EPERM: + case EACCES: + zfs_verror(hdl, EZFS_PERM, fmt, ap); + return (-1); + + case ECANCELED: + zfs_verror(hdl, EZFS_NODELEGATION, fmt, ap); + return (-1); + + case EIO: + zfs_verror(hdl, EZFS_IO, fmt, ap); + return (-1); + + case EFAULT: + zfs_verror(hdl, EZFS_FAULT, fmt, ap); + return (-1); + + case EINTR: + zfs_verror(hdl, EZFS_INTR, fmt, ap); + return (-1); + } + + return (0); +} + +int +zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg) +{ + return (zfs_standard_error_fmt(hdl, error, "%s", msg)); +} + +/*PRINTFLIKE3*/ +int +zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + if (zfs_common_error(hdl, error, fmt, ap) != 0) { + va_end(ap); + return (-1); + } + + switch (error) { + case ENXIO: + case ENODEV: + case EPIPE: + zfs_verror(hdl, EZFS_IO, fmt, ap); + break; + + case ENOENT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset does not exist")); + zfs_verror(hdl, EZFS_NOENT, fmt, ap); + break; + + case ENOSPC: + case EDQUOT: + zfs_verror(hdl, EZFS_NOSPC, fmt, ap); + va_end(ap); + return (-1); + + case EEXIST: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset already exists")); + zfs_verror(hdl, EZFS_EXISTS, fmt, ap); + break; + + case EBUSY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset is busy")); + zfs_verror(hdl, EZFS_BUSY, fmt, ap); + break; + case EROFS: + zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap); + break; + case ENAMETOOLONG: + zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap); + break; + case ENOTSUP: + zfs_verror(hdl, EZFS_BADVERSION, fmt, ap); + break; + case EAGAIN: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool I/O is currently suspended")); + zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); + break; + default: + zfs_error_aux(hdl, strerror(error)); + zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); + break; + } + + va_end(ap); + return (-1); +} + +int +zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg) +{ + return (zpool_standard_error_fmt(hdl, error, "%s", msg)); +} + +/*PRINTFLIKE3*/ +int +zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + if (zfs_common_error(hdl, error, fmt, ap) != 0) { + va_end(ap); + return (-1); + } + + switch (error) { + case ENODEV: + zfs_verror(hdl, EZFS_NODEVICE, fmt, ap); + break; + + case ENOENT: + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "no such pool or dataset")); + zfs_verror(hdl, EZFS_NOENT, fmt, ap); + break; + + case EEXIST: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool already exists")); + zfs_verror(hdl, EZFS_EXISTS, fmt, ap); + break; + + case EBUSY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy")); + zfs_verror(hdl, EZFS_BUSY, fmt, ap); + break; + + case ENXIO: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is currently unavailable")); + zfs_verror(hdl, EZFS_BADDEV, fmt, ap); + break; + + case ENAMETOOLONG: + zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap); + break; + + case ENOTSUP: + zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap); + break; + + case EINVAL: + zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap); + break; + + case ENOSPC: + case EDQUOT: + zfs_verror(hdl, EZFS_NOSPC, fmt, ap); + va_end(ap); + return (-1); + + case EAGAIN: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool I/O is currently suspended")); + zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); + break; + + case EROFS: + zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap); + break; + + default: + zfs_error_aux(hdl, strerror(error)); + zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); + } + + va_end(ap); + return (-1); +} + +/* + * Display an out of memory error message and abort the current program. + */ +int +no_memory(libzfs_handle_t *hdl) +{ + return (zfs_error(hdl, EZFS_NOMEM, "internal error")); +} + +/* + * A safe form of malloc() which will die if the allocation fails. + */ +void * +zfs_alloc(libzfs_handle_t *hdl, size_t size) +{ + void *data; + + if ((data = calloc(1, size)) == NULL) + (void) no_memory(hdl); + + return (data); +} + +/* + * A safe form of asprintf() which will die if the allocation fails. + */ +/*PRINTFLIKE2*/ +char * +zfs_asprintf(libzfs_handle_t *hdl, const char *fmt, ...) +{ + va_list ap; + char *ret; + int err; + + va_start(ap, fmt); + + err = vasprintf(&ret, fmt, ap); + + va_end(ap); + + if (err < 0) + (void) no_memory(hdl); + + return (ret); +} + +/* + * A safe form of realloc(), which also zeroes newly allocated space. + */ +void * +zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize) +{ + void *ret; + + if ((ret = realloc(ptr, newsize)) == NULL) { + (void) no_memory(hdl); + return (NULL); + } + + bzero((char *)ret + oldsize, (newsize - oldsize)); + return (ret); +} + +/* + * A safe form of strdup() which will die if the allocation fails. + */ +char * +zfs_strdup(libzfs_handle_t *hdl, const char *str) +{ + char *ret; + + if ((ret = strdup(str)) == NULL) + (void) no_memory(hdl); + + return (ret); +} + +/* + * Convert a number to an appropriately human-readable output. + */ +void +zfs_nicenum(uint64_t num, char *buf, size_t buflen) +{ + uint64_t n = num; + int index = 0; + char u; + + while (n >= 1024) { + n /= 1024; + index++; + } + + u = " KMGTPE"[index]; + + if (index == 0) { + (void) snprintf(buf, buflen, "%llu", n); + } else if ((num & ((1ULL << 10 * index) - 1)) == 0) { + /* + * If this is an even multiple of the base, always display + * without any decimal precision. + */ + (void) snprintf(buf, buflen, "%llu%c", n, u); + } else { + /* + * We want to choose a precision that reflects the best choice + * for fitting in 5 characters. This can get rather tricky when + * we have numbers that are very close to an order of magnitude. + * For example, when displaying 10239 (which is really 9.999K), + * we want only a single place of precision for 10.0K. We could + * develop some complex heuristics for this, but it's much + * easier just to try each combination in turn. + */ + int i; + for (i = 2; i >= 0; i--) { + if (snprintf(buf, buflen, "%.*f%c", i, + (double)num / (1ULL << 10 * index), u) <= 5) + break; + } + } +} + +void +libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr) +{ + hdl->libzfs_printerr = printerr; +} + +static int +libzfs_load(void) +{ + int error; + + if (modfind("zfs") < 0) { + /* Not present in kernel, try loading it. */ + if (kldload("zfs") < 0 || modfind("zfs") < 0) { + if (errno != EEXIST) + return (-1); + } + } + return (0); +} + +libzfs_handle_t * +libzfs_init(void) +{ + libzfs_handle_t *hdl; + + if ((hdl = calloc(1, sizeof (libzfs_handle_t))) == NULL) { + return (NULL); + } + + if (libzfs_load() < 0) { + free(hdl); + return (NULL); + } + + if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) { + free(hdl); + return (NULL); + } + + if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) { + (void) close(hdl->libzfs_fd); + free(hdl); + return (NULL); + } + + hdl->libzfs_sharetab = fopen(ZFS_EXPORTS_PATH, "r"); + + if (libzfs_core_init() != 0) { + (void) close(hdl->libzfs_fd); + (void) fclose(hdl->libzfs_mnttab); + (void) fclose(hdl->libzfs_sharetab); + free(hdl); + return (NULL); + } + + zfs_prop_init(); + zpool_prop_init(); + zpool_feature_init(); + libzfs_mnttab_init(hdl); + + return (hdl); +} + +void +libzfs_fini(libzfs_handle_t *hdl) +{ + (void) close(hdl->libzfs_fd); + if (hdl->libzfs_mnttab) + (void) fclose(hdl->libzfs_mnttab); + if (hdl->libzfs_sharetab) + (void) fclose(hdl->libzfs_sharetab); + zfs_uninit_libshare(hdl); + zpool_free_handles(hdl); +#ifdef sun + libzfs_fru_clear(hdl, B_TRUE); +#endif + namespace_clear(hdl); + libzfs_mnttab_fini(hdl); + libzfs_core_fini(); + free(hdl); +} + +libzfs_handle_t * +zpool_get_handle(zpool_handle_t *zhp) +{ + return (zhp->zpool_hdl); +} + +libzfs_handle_t * +zfs_get_handle(zfs_handle_t *zhp) +{ + return (zhp->zfs_hdl); +} + +zpool_handle_t * +zfs_get_pool_handle(const zfs_handle_t *zhp) +{ + return (zhp->zpool_hdl); +} + +/* + * Given a name, determine whether or not it's a valid path + * (starts with '/' or "./"). If so, walk the mnttab trying + * to match the device number. If not, treat the path as an + * fs/vol/snap name. + */ +zfs_handle_t * +zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype) +{ + struct stat64 statbuf; + struct extmnttab entry; + int ret; + + if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) { + /* + * It's not a valid path, assume it's a name of type 'argtype'. + */ + return (zfs_open(hdl, path, argtype)); + } + + if (stat64(path, &statbuf) != 0) { + (void) fprintf(stderr, "%s: %s\n", path, strerror(errno)); + return (NULL); + } + +#ifdef sun + rewind(hdl->libzfs_mnttab); + while ((ret = getextmntent(hdl->libzfs_mnttab, &entry, 0)) == 0) { + if (makedevice(entry.mnt_major, entry.mnt_minor) == + statbuf.st_dev) { + break; + } + } +#else + { + struct statfs sfs; + + ret = statfs(path, &sfs); + if (ret == 0) + statfs2mnttab(&sfs, &entry); + else { + (void) fprintf(stderr, "%s: %s\n", path, + strerror(errno)); + } + } +#endif /* sun */ + if (ret != 0) { + return (NULL); + } + + if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) { + (void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"), + path); + return (NULL); + } + + return (zfs_open(hdl, entry.mnt_special, ZFS_TYPE_FILESYSTEM)); +} + +/* + * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from + * an ioctl(). + */ +int +zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) +{ + if (len == 0) + len = 16 * 1024; + zc->zc_nvlist_dst_size = len; + if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t) + zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0) + return (-1); + + return (0); +} + +/* + * Called when an ioctl() which returns an nvlist fails with ENOMEM. This will + * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was + * filled in by the kernel to indicate the actual required size. + */ +int +zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc) +{ + free((void *)(uintptr_t)zc->zc_nvlist_dst); + if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t) + zfs_alloc(hdl, zc->zc_nvlist_dst_size)) + == 0) + return (-1); + + return (0); +} + +/* + * Called to free the src and dst nvlists stored in the command structure. + */ +void +zcmd_free_nvlists(zfs_cmd_t *zc) +{ + free((void *)(uintptr_t)zc->zc_nvlist_conf); + free((void *)(uintptr_t)zc->zc_nvlist_src); + free((void *)(uintptr_t)zc->zc_nvlist_dst); +} + +static int +zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen, + nvlist_t *nvl) +{ + char *packed; + size_t len; + + verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0); + + if ((packed = zfs_alloc(hdl, len)) == NULL) + return (-1); + + verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0); + + *outnv = (uint64_t)(uintptr_t)packed; + *outlen = len; + + return (0); +} + +int +zcmd_write_conf_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl) +{ + return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_conf, + &zc->zc_nvlist_conf_size, nvl)); +} + +int +zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl) +{ + return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_src, + &zc->zc_nvlist_src_size, nvl)); +} + +/* + * Unpacks an nvlist from the ZFS ioctl command structure. + */ +int +zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp) +{ + if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst, + zc->zc_nvlist_dst_size, nvlp, 0) != 0) + return (no_memory(hdl)); + + return (0); +} + +int +zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc) +{ + return (ioctl(hdl->libzfs_fd, request, zc)); +} + +/* + * ================================================================ + * API shared by zfs and zpool property management + * ================================================================ + */ + +static void +zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) +{ + zprop_list_t *pl = cbp->cb_proplist; + int i; + char *title; + size_t len; + + cbp->cb_first = B_FALSE; + if (cbp->cb_scripted) + return; + + /* + * Start with the length of the column headers. + */ + cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME")); + cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN, + "PROPERTY")); + cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN, + "VALUE")); + cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN, + "RECEIVED")); + cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN, + "SOURCE")); + + /* first property is always NAME */ + assert(cbp->cb_proplist->pl_prop == + ((type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : ZFS_PROP_NAME)); + + /* + * Go through and calculate the widths for each column. For the + * 'source' column, we kludge it up by taking the worst-case scenario of + * inheriting from the longest name. This is acceptable because in the + * majority of cases 'SOURCE' is the last column displayed, and we don't + * use the width anyway. Note that the 'VALUE' column can be oversized, + * if the name of the property is much longer than any values we find. + */ + for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { + /* + * 'PROPERTY' column + */ + if (pl->pl_prop != ZPROP_INVAL) { + const char *propname = (type == ZFS_TYPE_POOL) ? + zpool_prop_to_name(pl->pl_prop) : + zfs_prop_to_name(pl->pl_prop); + + len = strlen(propname); + if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) + cbp->cb_colwidths[GET_COL_PROPERTY] = len; + } else { + len = strlen(pl->pl_user_prop); + if (len > cbp->cb_colwidths[GET_COL_PROPERTY]) + cbp->cb_colwidths[GET_COL_PROPERTY] = len; + } + + /* + * 'VALUE' column. The first property is always the 'name' + * property that was tacked on either by /sbin/zfs's + * zfs_do_get() or when calling zprop_expand_list(), so we + * ignore its width. If the user specified the name property + * to display, then it will be later in the list in any case. + */ + if (pl != cbp->cb_proplist && + pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE]) + cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width; + + /* 'RECEIVED' column. */ + if (pl != cbp->cb_proplist && + pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD]) + cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width; + + /* + * 'NAME' and 'SOURCE' columns + */ + if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME : + ZFS_PROP_NAME) && + pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) { + cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width; + cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width + + strlen(dgettext(TEXT_DOMAIN, "inherited from")); + } + } + + /* + * Now go through and print the headers. + */ + for (i = 0; i < ZFS_GET_NCOLS; i++) { + switch (cbp->cb_columns[i]) { + case GET_COL_NAME: + title = dgettext(TEXT_DOMAIN, "NAME"); + break; + case GET_COL_PROPERTY: + title = dgettext(TEXT_DOMAIN, "PROPERTY"); + break; + case GET_COL_VALUE: + title = dgettext(TEXT_DOMAIN, "VALUE"); + break; + case GET_COL_RECVD: + title = dgettext(TEXT_DOMAIN, "RECEIVED"); + break; + case GET_COL_SOURCE: + title = dgettext(TEXT_DOMAIN, "SOURCE"); + break; + default: + title = NULL; + } + + if (title != NULL) { + if (i == (ZFS_GET_NCOLS - 1) || + cbp->cb_columns[i + 1] == GET_COL_NONE) + (void) printf("%s", title); + else + (void) printf("%-*s ", + cbp->cb_colwidths[cbp->cb_columns[i]], + title); + } + } + (void) printf("\n"); +} + +/* + * Display a single line of output, according to the settings in the callback + * structure. + */ +void +zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, + const char *propname, const char *value, zprop_source_t sourcetype, + const char *source, const char *recvd_value) +{ + int i; + const char *str; + char buf[128]; + + /* + * Ignore those source types that the user has chosen to ignore. + */ + if ((sourcetype & cbp->cb_sources) == 0) + return; + + if (cbp->cb_first) + zprop_print_headers(cbp, cbp->cb_type); + + for (i = 0; i < ZFS_GET_NCOLS; i++) { + switch (cbp->cb_columns[i]) { + case GET_COL_NAME: + str = name; + break; + + case GET_COL_PROPERTY: + str = propname; + break; + + case GET_COL_VALUE: + str = value; + break; + + case GET_COL_SOURCE: + switch (sourcetype) { + case ZPROP_SRC_NONE: + str = "-"; + break; + + case ZPROP_SRC_DEFAULT: + str = "default"; + break; + + case ZPROP_SRC_LOCAL: + str = "local"; + break; + + case ZPROP_SRC_TEMPORARY: + str = "temporary"; + break; + + case ZPROP_SRC_INHERITED: + (void) snprintf(buf, sizeof (buf), + "inherited from %s", source); + str = buf; + break; + case ZPROP_SRC_RECEIVED: + str = "received"; + break; + } + break; + + case GET_COL_RECVD: + str = (recvd_value == NULL ? "-" : recvd_value); + break; + + default: + continue; + } + + if (cbp->cb_columns[i + 1] == GET_COL_NONE) + (void) printf("%s", str); + else if (cbp->cb_scripted) + (void) printf("%s\t", str); + else + (void) printf("%-*s ", + cbp->cb_colwidths[cbp->cb_columns[i]], + str); + } + + (void) printf("\n"); +} + +/* + * Given a numeric suffix, convert the value into a number of bits that the + * resulting value must be shifted. + */ +static int +str2shift(libzfs_handle_t *hdl, const char *buf) +{ + const char *ends = "BKMGTPEZ"; + int i; + + if (buf[0] == '\0') + return (0); + for (i = 0; i < strlen(ends); i++) { + if (toupper(buf[0]) == ends[i]) + break; + } + if (i == strlen(ends)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid numeric suffix '%s'"), buf); + return (-1); + } + + /* + * We want to allow trailing 'b' characters for 'GB' or 'Mb'. But don't + * allow 'BB' - that's just weird. + */ + if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' && + toupper(buf[0]) != 'B')) + return (10*i); + + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid numeric suffix '%s'"), buf); + return (-1); +} + +/* + * Convert a string of the form '100G' into a real number. Used when setting + * properties or creating a volume. 'buf' is used to place an extended error + * message for the caller to use. + */ +int +zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num) +{ + char *end; + int shift; + + *num = 0; + + /* Check to see if this looks like a number. */ + if ((value[0] < '0' || value[0] > '9') && value[0] != '.') { + if (hdl) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "bad numeric value '%s'"), value); + return (-1); + } + + /* Rely on strtoull() to process the numeric portion. */ + errno = 0; + *num = strtoull(value, &end, 10); + + /* + * Check for ERANGE, which indicates that the value is too large to fit + * in a 64-bit value. + */ + if (errno == ERANGE) { + if (hdl) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "numeric value is too large")); + return (-1); + } + + /* + * If we have a decimal value, then do the computation with floating + * point arithmetic. Otherwise, use standard arithmetic. + */ + if (*end == '.') { + double fval = strtod(value, &end); + + if ((shift = str2shift(hdl, end)) == -1) + return (-1); + + fval *= pow(2, shift); + + if (fval > UINT64_MAX) { + if (hdl) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "numeric value is too large")); + return (-1); + } + + *num = (uint64_t)fval; + } else { + if ((shift = str2shift(hdl, end)) == -1) + return (-1); + + /* Check for overflow */ + if (shift >= 64 || (*num << shift) >> shift != *num) { + if (hdl) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "numeric value is too large")); + return (-1); + } + + *num <<= shift; + } + + return (0); +} + +/* + * Given a propname=value nvpair to set, parse any numeric properties + * (index, boolean, etc) if they are specified as strings and add the + * resulting nvpair to the returned nvlist. + * + * At the DSL layer, all properties are either 64-bit numbers or strings. + * We want the user to be able to ignore this fact and specify properties + * as native values (numbers, for example) or as strings (to simplify + * command line utilities). This also handles converting index types + * (compression, checksum, etc) from strings to their on-disk index. + */ +int +zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop, + zfs_type_t type, nvlist_t *ret, char **svalp, uint64_t *ivalp, + const char *errbuf) +{ + data_type_t datatype = nvpair_type(elem); + zprop_type_t proptype; + const char *propname; + char *value; + boolean_t isnone = B_FALSE; + + if (type == ZFS_TYPE_POOL) { + proptype = zpool_prop_get_type(prop); + propname = zpool_prop_to_name(prop); + } else { + proptype = zfs_prop_get_type(prop); + propname = zfs_prop_to_name(prop); + } + + /* + * Convert any properties to the internal DSL value types. + */ + *svalp = NULL; + *ivalp = 0; + + switch (proptype) { + case PROP_TYPE_STRING: + if (datatype != DATA_TYPE_STRING) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be a string"), nvpair_name(elem)); + goto error; + } + (void) nvpair_value_string(elem, svalp); + if (strlen(*svalp) >= ZFS_MAXPROPLEN) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is too long"), nvpair_name(elem)); + goto error; + } + break; + + case PROP_TYPE_NUMBER: + if (datatype == DATA_TYPE_STRING) { + (void) nvpair_value_string(elem, &value); + if (strcmp(value, "none") == 0) { + isnone = B_TRUE; + } else if (zfs_nicestrtonum(hdl, value, ivalp) + != 0) { + goto error; + } + } else if (datatype == DATA_TYPE_UINT64) { + (void) nvpair_value_uint64(elem, ivalp); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be a number"), nvpair_name(elem)); + goto error; + } + + /* + * Quota special: force 'none' and don't allow 0. + */ + if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone && + (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "use 'none' to disable quota/refquota")); + goto error; + } + + /* + * Special handling for "*_limit=none". In this case it's not + * 0 but UINT64_MAX. + */ + if ((type & ZFS_TYPE_DATASET) && isnone && + (prop == ZFS_PROP_FILESYSTEM_LIMIT || + prop == ZFS_PROP_SNAPSHOT_LIMIT)) { + *ivalp = UINT64_MAX; + } + break; + + case PROP_TYPE_INDEX: + if (datatype != DATA_TYPE_STRING) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be a string"), nvpair_name(elem)); + goto error; + } + + (void) nvpair_value_string(elem, &value); + + if (zprop_string_to_index(prop, value, ivalp, type) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be one of '%s'"), propname, + zprop_values(prop, type)); + goto error; + } + break; + + default: + abort(); + } + + /* + * Add the result to our return set of properties. + */ + if (*svalp != NULL) { + if (nvlist_add_string(ret, propname, *svalp) != 0) { + (void) no_memory(hdl); + return (-1); + } + } else { + if (nvlist_add_uint64(ret, propname, *ivalp) != 0) { + (void) no_memory(hdl); + return (-1); + } + } + + return (0); +error: + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + return (-1); +} + +static int +addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp, + zfs_type_t type) +{ + int prop; + zprop_list_t *entry; + + prop = zprop_name_to_prop(propname, type); + + if (prop != ZPROP_INVAL && !zprop_valid_for_type(prop, type)) + prop = ZPROP_INVAL; + + /* + * When no property table entry can be found, return failure if + * this is a pool property or if this isn't a user-defined + * dataset property, + */ + if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL && + !zpool_prop_feature(propname) && + !zpool_prop_unsupported(propname)) || + (type == ZFS_TYPE_DATASET && !zfs_prop_user(propname) && + !zfs_prop_userquota(propname) && !zfs_prop_written(propname)))) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid property '%s'"), propname); + return (zfs_error(hdl, EZFS_BADPROP, + dgettext(TEXT_DOMAIN, "bad property list"))); + } + + if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) + return (-1); + + entry->pl_prop = prop; + if (prop == ZPROP_INVAL) { + if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == + NULL) { + free(entry); + return (-1); + } + entry->pl_width = strlen(propname); + } else { + entry->pl_width = zprop_width(prop, &entry->pl_fixed, + type); + } + + *listp = entry; + + return (0); +} + +/* + * Given a comma-separated list of properties, construct a property list + * containing both user-defined and native properties. This function will + * return a NULL list if 'all' is specified, which can later be expanded + * by zprop_expand_list(). + */ +int +zprop_get_list(libzfs_handle_t *hdl, char *props, zprop_list_t **listp, + zfs_type_t type) +{ + *listp = NULL; + + /* + * If 'all' is specified, return a NULL list. + */ + if (strcmp(props, "all") == 0) + return (0); + + /* + * If no props were specified, return an error. + */ + if (props[0] == '\0') { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no properties specified")); + return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN, + "bad property list"))); + } + + /* + * It would be nice to use getsubopt() here, but the inclusion of column + * aliases makes this more effort than it's worth. + */ + while (*props != '\0') { + size_t len; + char *p; + char c; + + if ((p = strchr(props, ',')) == NULL) { + len = strlen(props); + p = props + len; + } else { + len = p - props; + } + + /* + * Check for empty options. + */ + if (len == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "empty property name")); + return (zfs_error(hdl, EZFS_BADPROP, + dgettext(TEXT_DOMAIN, "bad property list"))); + } + + /* + * Check all regular property names. + */ + c = props[len]; + props[len] = '\0'; + + if (strcmp(props, "space") == 0) { + static char *spaceprops[] = { + "name", "avail", "used", "usedbysnapshots", + "usedbydataset", "usedbyrefreservation", + "usedbychildren", NULL + }; + int i; + + for (i = 0; spaceprops[i]; i++) { + if (addlist(hdl, spaceprops[i], listp, type)) + return (-1); + listp = &(*listp)->pl_next; + } + } else { + if (addlist(hdl, props, listp, type)) + return (-1); + listp = &(*listp)->pl_next; + } + + props = p; + if (c == ',') + props++; + } + + return (0); +} + +void +zprop_free_list(zprop_list_t *pl) +{ + zprop_list_t *next; + + while (pl != NULL) { + next = pl->pl_next; + free(pl->pl_user_prop); + free(pl); + pl = next; + } +} + +typedef struct expand_data { + zprop_list_t **last; + libzfs_handle_t *hdl; + zfs_type_t type; +} expand_data_t; + +int +zprop_expand_list_cb(int prop, void *cb) +{ + zprop_list_t *entry; + expand_data_t *edp = cb; + + if ((entry = zfs_alloc(edp->hdl, sizeof (zprop_list_t))) == NULL) + return (ZPROP_INVAL); + + entry->pl_prop = prop; + entry->pl_width = zprop_width(prop, &entry->pl_fixed, edp->type); + entry->pl_all = B_TRUE; + + *(edp->last) = entry; + edp->last = &entry->pl_next; + + return (ZPROP_CONT); +} + +int +zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type) +{ + zprop_list_t *entry; + zprop_list_t **last; + expand_data_t exp; + + if (*plp == NULL) { + /* + * If this is the very first time we've been called for an 'all' + * specification, expand the list to include all native + * properties. + */ + last = plp; + + exp.last = last; + exp.hdl = hdl; + exp.type = type; + + if (zprop_iter_common(zprop_expand_list_cb, &exp, B_FALSE, + B_FALSE, type) == ZPROP_INVAL) + return (-1); + + /* + * Add 'name' to the beginning of the list, which is handled + * specially. + */ + if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL) + return (-1); + + entry->pl_prop = (type == ZFS_TYPE_POOL) ? ZPOOL_PROP_NAME : + ZFS_PROP_NAME; + entry->pl_width = zprop_width(entry->pl_prop, + &entry->pl_fixed, type); + entry->pl_all = B_TRUE; + entry->pl_next = *plp; + *plp = entry; + } + return (0); +} + +int +zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, + zfs_type_t type) +{ + return (zprop_iter_common(func, cb, show_all, ordered, type)); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c new file mode 100644 index 0000000..52bd580 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c @@ -0,0 +1,776 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. + */ + +/* + * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. + * It has the following characteristics: + * + * - Thread Safe. libzfs_core is accessible concurrently from multiple + * threads. This is accomplished primarily by avoiding global data + * (e.g. caching). Since it's thread-safe, there is no reason for a + * process to have multiple libzfs "instances". Therefore, we store + * our few pieces of data (e.g. the file descriptor) in global + * variables. The fd is reference-counted so that the libzfs_core + * library can be "initialized" multiple times (e.g. by different + * consumers within the same process). + * + * - Committed Interface. The libzfs_core interface will be committed, + * therefore consumers can compile against it and be confident that + * their code will continue to work on future releases of this code. + * Currently, the interface is Evolving (not Committed), but we intend + * to commit to it once it is more complete and we determine that it + * meets the needs of all consumers. + * + * - Programatic Error Handling. libzfs_core communicates errors with + * defined error numbers, and doesn't print anything to stdout/stderr. + * + * - Thin Layer. libzfs_core is a thin layer, marshaling arguments + * to/from the kernel ioctls. There is generally a 1:1 correspondence + * between libzfs_core functions and ioctls to /dev/zfs. + * + * - Clear Atomicity. Because libzfs_core functions are generally 1:1 + * with kernel ioctls, and kernel ioctls are general atomic, each + * libzfs_core function is atomic. For example, creating multiple + * snapshots with a single call to lzc_snapshot() is atomic -- it + * can't fail with only some of the requested snapshots created, even + * in the event of power loss or system crash. + * + * - Continued libzfs Support. Some higher-level operations (e.g. + * support for "zfs send -R") are too complicated to fit the scope of + * libzfs_core. This functionality will continue to live in libzfs. + * Where appropriate, libzfs will use the underlying atomic operations + * of libzfs_core. For example, libzfs may implement "zfs send -R | + * zfs receive" by using individual "send one snapshot", rename, + * destroy, and "receive one snapshot" operations in libzfs_core. + * /sbin/zfs and /zbin/zpool will link with both libzfs and + * libzfs_core. Other consumers should aim to use only libzfs_core, + * since that will be the supported, stable interface going forwards. + */ + +#define _IN_LIBZFS_CORE_ + +#include <libzfs_core.h> +#include <ctype.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <sys/nvpair.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/zfs_ioctl.h> +#include "libzfs_core_compat.h" +#include "libzfs_compat.h" + +#ifdef __FreeBSD__ +extern int zfs_ioctl_version; +#endif + +static int g_fd; +static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; +static int g_refcount; + +int +libzfs_core_init(void) +{ + (void) pthread_mutex_lock(&g_lock); + if (g_refcount == 0) { + g_fd = open("/dev/zfs", O_RDWR); + if (g_fd < 0) { + (void) pthread_mutex_unlock(&g_lock); + return (errno); + } + } + g_refcount++; + (void) pthread_mutex_unlock(&g_lock); + + return (0); +} + +void +libzfs_core_fini(void) +{ + (void) pthread_mutex_lock(&g_lock); + ASSERT3S(g_refcount, >, 0); + g_refcount--; + if (g_refcount == 0) + (void) close(g_fd); + (void) pthread_mutex_unlock(&g_lock); +} + +static int +lzc_ioctl(zfs_ioc_t ioc, const char *name, + nvlist_t *source, nvlist_t **resultp) +{ + zfs_cmd_t zc = { 0 }; + int error = 0; + char *packed; +#ifdef __FreeBSD__ + nvlist_t *oldsource; +#endif + size_t size; + + ASSERT3S(g_refcount, >, 0); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + +#ifdef __FreeBSD__ + if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) + zfs_ioctl_version = get_zfs_ioctl_version(); + + if (zfs_ioctl_version < ZFS_IOCVER_LZC) { + oldsource = source; + error = lzc_compat_pre(&zc, &ioc, &source); + if (error) + return (error); + } +#endif + + packed = fnvlist_pack(source, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + + if (resultp != NULL) { + *resultp = NULL; + zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); + zc.zc_nvlist_dst = (uint64_t)(uintptr_t) + malloc(zc.zc_nvlist_dst_size); +#ifdef illumos + if (zc.zc_nvlist_dst == NULL) { +#else + if (zc.zc_nvlist_dst == 0) { +#endif + error = ENOMEM; + goto out; + } + } + + while (ioctl(g_fd, ioc, &zc) != 0) { + if (errno == ENOMEM && resultp != NULL) { + free((void *)(uintptr_t)zc.zc_nvlist_dst); + zc.zc_nvlist_dst_size *= 2; + zc.zc_nvlist_dst = (uint64_t)(uintptr_t) + malloc(zc.zc_nvlist_dst_size); +#ifdef illumos + if (zc.zc_nvlist_dst == NULL) { +#else + if (zc.zc_nvlist_dst == 0) { +#endif + error = ENOMEM; + goto out; + } + } else { + error = errno; + break; + } + } + +#ifdef __FreeBSD__ + if (zfs_ioctl_version < ZFS_IOCVER_LZC) + lzc_compat_post(&zc, ioc); +#endif + if (zc.zc_nvlist_dst_filled) { + *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, + zc.zc_nvlist_dst_size); + } +#ifdef __FreeBSD__ + if (zfs_ioctl_version < ZFS_IOCVER_LZC) + lzc_compat_outnvl(&zc, ioc, resultp); +#endif +out: +#ifdef __FreeBSD__ + if (zfs_ioctl_version < ZFS_IOCVER_LZC) { + if (source != oldsource) + nvlist_free(source); + source = oldsource; + } +#endif + fnvlist_pack_free(packed, size); + free((void *)(uintptr_t)zc.zc_nvlist_dst); + return (error); +} + +int +lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props) +{ + int error; + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_int32(args, "type", type); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); + nvlist_free(args); + return (error); +} + +int +lzc_clone(const char *fsname, const char *origin, + nvlist_t *props) +{ + int error; + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, "origin", origin); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); + nvlist_free(args); + return (error); +} + +/* + * Creates snapshots. + * + * The keys in the snaps nvlist are the snapshots to be created. + * They must all be in the same pool. + * + * The props nvlist is properties to set. Currently only user properties + * are supported. { user:prop_name -> string value } + * + * The returned results nvlist will have an entry for each snapshot that failed. + * The value will be the (int32) error code. + * + * The return value will be 0 if all snapshots were created, otherwise it will + * be the errno of a (unspecified) snapshot that failed. + */ +int +lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) +{ + nvpair_t *elem; + nvlist_t *args; + int error; + char pool[MAXNAMELEN]; + + *errlist = NULL; + + /* determine the pool name */ + elem = nvlist_next_nvpair(snaps, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "snaps", snaps); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + + error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); + nvlist_free(args); + + return (error); +} + +/* + * Destroys snapshots. + * + * The keys in the snaps nvlist are the snapshots to be destroyed. + * They must all be in the same pool. + * + * Snapshots that do not exist will be silently ignored. + * + * If 'defer' is not set, and a snapshot has user holds or clones, the + * destroy operation will fail and none of the snapshots will be + * destroyed. + * + * If 'defer' is set, and a snapshot has user holds or clones, it will be + * marked for deferred destruction, and will be destroyed when the last hold + * or clone is removed/destroyed. + * + * The return value will be 0 if all snapshots were destroyed (or marked for + * later destruction if 'defer' is set) or didn't exist to begin with. + * + * Otherwise the return value will be the errno of a (unspecified) snapshot + * that failed, no snapshots will be destroyed, and the errlist will have an + * entry for each snapshot that failed. The value in the errlist will be + * the (int32) error code. + */ +int +lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) +{ + nvpair_t *elem; + nvlist_t *args; + int error; + char pool[MAXNAMELEN]; + + /* determine the pool name */ + elem = nvlist_next_nvpair(snaps, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "snaps", snaps); + if (defer) + fnvlist_add_boolean(args, "defer"); + + error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); + nvlist_free(args); + + return (error); +} + +int +lzc_snaprange_space(const char *firstsnap, const char *lastsnap, + uint64_t *usedp) +{ + nvlist_t *args; + nvlist_t *result; + int err; + char fs[MAXNAMELEN]; + char *atp; + + /* determine the fs name */ + (void) strlcpy(fs, firstsnap, sizeof (fs)); + atp = strchr(fs, '@'); + if (atp == NULL) + return (EINVAL); + *atp = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_string(args, "firstsnap", firstsnap); + + err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); + nvlist_free(args); + if (err == 0) + *usedp = fnvlist_lookup_uint64(result, "used"); + fnvlist_free(result); + + return (err); +} + +boolean_t +lzc_exists(const char *dataset) +{ + /* + * The objset_stats ioctl is still legacy, so we need to construct our + * own zfs_cmd_t rather than using zfsc_ioctl(). + */ + zfs_cmd_t zc = { 0 }; + + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); +} + +/* + * Create "user holds" on snapshots. If there is a hold on a snapshot, + * the snapshot can not be destroyed. (However, it can be marked for deletion + * by lzc_destroy_snaps(defer=B_TRUE).) + * + * The keys in the nvlist are snapshot names. + * The snapshots must all be in the same pool. + * The value is the name of the hold (string type). + * + * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). + * In this case, when the cleanup_fd is closed (including on process + * termination), the holds will be released. If the system is shut down + * uncleanly, the holds will be released when the pool is next opened + * or imported. + * + * Holds for snapshots which don't exist will be skipped and have an entry + * added to errlist, but will not cause an overall failure. + * + * The return value will be 0 if all holds, for snapshots that existed, + * were succesfully created. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed and no holds will be created. + * + * In all cases the errlist will have an entry for each hold that failed + * (name = snapshot), with its value being the error code (int32). + */ +int +lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) +{ + char pool[MAXNAMELEN]; + nvlist_t *args; + nvpair_t *elem; + int error; + + /* determine the pool name */ + elem = nvlist_next_nvpair(holds, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "holds", holds); + if (cleanup_fd != -1) + fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); + + error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); + nvlist_free(args); + return (error); +} + +/* + * Release "user holds" on snapshots. If the snapshot has been marked for + * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have + * any clones, and all the user holds are removed, then the snapshot will be + * destroyed. + * + * The keys in the nvlist are snapshot names. + * The snapshots must all be in the same pool. + * The value is a nvlist whose keys are the holds to remove. + * + * Holds which failed to release because they didn't exist will have an entry + * added to errlist, but will not cause an overall failure. + * + * The return value will be 0 if the nvl holds was empty or all holds that + * existed, were successfully removed. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed to release and no holds will be released. + * + * In all cases the errlist will have an entry for each hold that failed to + * to release. + */ +int +lzc_release(nvlist_t *holds, nvlist_t **errlist) +{ + char pool[MAXNAMELEN]; + nvpair_t *elem; + + /* determine the pool name */ + elem = nvlist_next_nvpair(holds, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); +} + +/* + * Retrieve list of user holds on the specified snapshot. + * + * On success, *holdsp will be set to a nvlist which the caller must free. + * The keys are the names of the holds, and the value is the creation time + * of the hold (uint64) in seconds since the epoch. + */ +int +lzc_get_holds(const char *snapname, nvlist_t **holdsp) +{ + int error; + nvlist_t *innvl = fnvlist_alloc(); + error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp); + fnvlist_free(innvl); + return (error); +} + +/* + * Generate a zfs send stream for the specified snapshot and write it to + * the specified file descriptor. + * + * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") + * + * If "from" is NULL, a full (non-incremental) stream will be sent. + * If "from" is non-NULL, it must be the full name of a snapshot or + * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or + * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or + * bookmark must represent an earlier point in the history of "snapname"). + * It can be an earlier snapshot in the same filesystem or zvol as "snapname", + * or it can be the origin of "snapname"'s filesystem, or an earlier + * snapshot in the origin, etc. + * + * "fd" is the file descriptor to write the send stream to. + * + * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted + * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT + * records with drr_blksz > 128K. + * + * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted + * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, + * which the receiving system must support (as indicated by support + * for the "embedded_data" feature). + */ +int +lzc_send(const char *snapname, const char *from, int fd, + enum lzc_send_flags flags) +{ + nvlist_t *args; + int err; + + args = fnvlist_alloc(); + fnvlist_add_int32(args, "fd", fd); + if (from != NULL) + fnvlist_add_string(args, "fromsnap", from); + if (flags & LZC_SEND_FLAG_LARGE_BLOCK) + fnvlist_add_boolean(args, "largeblockok"); + if (flags & LZC_SEND_FLAG_EMBED_DATA) + fnvlist_add_boolean(args, "embedok"); + err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); + nvlist_free(args); + return (err); +} + +/* + * If fromsnap is NULL, a full (non-incremental) stream will be estimated. + */ +int +lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) +{ + nvlist_t *args; + nvlist_t *result; + int err; + + args = fnvlist_alloc(); + if (fromsnap != NULL) + fnvlist_add_string(args, "fromsnap", fromsnap); + err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); + nvlist_free(args); + if (err == 0) + *spacep = fnvlist_lookup_uint64(result, "space"); + nvlist_free(result); + return (err); +} + +static int +recv_read(int fd, void *buf, int ilen) +{ + char *cp = buf; + int rv; + int len = ilen; + + do { + rv = read(fd, cp, len); + cp += rv; + len -= rv; + } while (rv > 0); + + if (rv < 0 || len != 0) + return (EIO); + + return (0); +} + +/* + * The simplest receive case: receive from the specified fd, creating the + * specified snapshot. Apply the specified properties a "received" properties + * (which can be overridden by locally-set properties). If the stream is a + * clone, its origin snapshot must be specified by 'origin'. The 'force' + * flag will cause the target filesystem to be rolled back or destroyed if + * necessary to receive. + * + * Return 0 on success or an errno on failure. + * + * Note: this interface does not work on dedup'd streams + * (those with DMU_BACKUP_FEATURE_DEDUP). + */ +int +lzc_receive(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd) +{ + /* + * The receive ioctl is still legacy, so we need to construct our own + * zfs_cmd_t rather than using zfsc_ioctl(). + */ + zfs_cmd_t zc = { 0 }; + char *atp; + char *packed = NULL; + size_t size; + dmu_replay_record_t drr; + int error; + + ASSERT3S(g_refcount, >, 0); + + /* zc_name is name of containing filesystem */ + (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); + atp = strchr(zc.zc_name, '@'); + if (atp == NULL) + return (EINVAL); + *atp = '\0'; + + /* if the fs does not exist, try its parent. */ + if (!lzc_exists(zc.zc_name)) { + char *slashp = strrchr(zc.zc_name, '/'); + if (slashp == NULL) + return (ENOENT); + *slashp = '\0'; + + } + + /* zc_value is full name of the snapshot to create */ + (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + + if (props != NULL) { + /* zc_nvlist_src is props to set */ + packed = fnvlist_pack(props, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + } + + /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ + if (origin != NULL) + (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); + + /* zc_begin_record is non-byteswapped BEGIN record */ + error = recv_read(fd, &drr, sizeof (drr)); + if (error != 0) + goto out; + zc.zc_begin_record = drr.drr_u.drr_begin; + + /* zc_cookie is fd to read from */ + zc.zc_cookie = fd; + + /* zc guid is force flag */ + zc.zc_guid = force; + + /* zc_cleanup_fd is unused */ + zc.zc_cleanup_fd = -1; + + error = ioctl(g_fd, ZFS_IOC_RECV, &zc); + if (error != 0) + error = errno; + +out: + if (packed != NULL) + fnvlist_pack_free(packed, size); + free((void*)(uintptr_t)zc.zc_nvlist_dst); + return (error); +} + +/* + * Roll back this filesystem or volume to its most recent snapshot. + * If snapnamebuf is not NULL, it will be filled in with the name + * of the most recent snapshot. + * + * Return 0 on success or an errno on failure. + */ +int +lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen) +{ + nvlist_t *args; + nvlist_t *result; + int err; + + args = fnvlist_alloc(); + err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); + nvlist_free(args); + if (err == 0 && snapnamebuf != NULL) { + const char *snapname = fnvlist_lookup_string(result, "target"); + (void) strlcpy(snapnamebuf, snapname, snapnamelen); + } + return (err); +} + +/* + * Creates bookmarks. + * + * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to + * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and + * snapshots must be in the same pool. + * + * The returned results nvlist will have an entry for each bookmark that failed. + * The value will be the (int32) error code. + * + * The return value will be 0 if all bookmarks were created, otherwise it will + * be the errno of a (undetermined) bookmarks that failed. + */ +int +lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) +{ + nvpair_t *elem; + int error; + char pool[MAXNAMELEN]; + + /* determine the pool name */ + elem = nvlist_next_nvpair(bookmarks, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/#")] = '\0'; + + error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist); + + return (error); +} + +/* + * Retrieve bookmarks. + * + * Retrieve the list of bookmarks for the given file system. The props + * parameter is an nvlist of property names (with no values) that will be + * returned for each bookmark. + * + * The following are valid properties on bookmarks, all of which are numbers + * (represented as uint64 in the nvlist) + * + * "guid" - globally unique identifier of the snapshot it refers to + * "createtxg" - txg when the snapshot it refers to was created + * "creation" - timestamp when the snapshot it refers to was created + * + * The format of the returned nvlist as follows: + * <short name of bookmark> -> { + * <name of property> -> { + * "value" -> uint64 + * } + * } + */ +int +lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) +{ + return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks)); +} + +/* + * Destroys bookmarks. + * + * The keys in the bmarks nvlist are the bookmarks to be destroyed. + * They must all be in the same pool. Bookmarks are specified as + * <fs>#<bmark>. + * + * Bookmarks that do not exist will be silently ignored. + * + * The return value will be 0 if all bookmarks that existed were destroyed. + * + * Otherwise the return value will be the errno of a (undetermined) bookmark + * that failed, no bookmarks will be destroyed, and the errlist will have an + * entry for each bookmarks that failed. The value in the errlist will be + * the (int32) error code. + */ +int +lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) +{ + nvpair_t *elem; + int error; + char pool[MAXNAMELEN]; + + /* determine the pool name */ + elem = nvlist_next_nvpair(bmarks, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/#")] = '\0'; + + error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist); + + return (error); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h new file mode 100644 index 0000000..b6a4c12 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 by Martin Matuska <mm@FreeBSD.org>. All rights reserved. + */ + +#ifndef _LIBZFS_CORE_H +#define _LIBZFS_CORE_H + +#include <libnvpair.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/fs/zfs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int libzfs_core_init(void); +void libzfs_core_fini(void); + +int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); +int lzc_create(const char *, dmu_objset_type_t, nvlist_t *); +int lzc_clone(const char *, const char *, nvlist_t *); +int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); +int lzc_bookmark(nvlist_t *, nvlist_t **); +int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); +int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); + +int lzc_snaprange_space(const char *, const char *, uint64_t *); + +int lzc_hold(nvlist_t *, int, nvlist_t **); +int lzc_release(nvlist_t *, nvlist_t **); +int lzc_get_holds(const char *, nvlist_t **); + +enum lzc_send_flags { + LZC_SEND_FLAG_EMBED_DATA = 1 << 0, + LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1 +}; + +int lzc_send(const char *, const char *, int, enum lzc_send_flags); +int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); +int lzc_send_space(const char *, const char *, uint64_t *); + +boolean_t lzc_exists(const char *); + +int lzc_rollback(const char *, char *, int); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_CORE_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c new file mode 100644 index 0000000..a3b872e --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c @@ -0,0 +1,189 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + */ + +#include <sys/zfs_ioctl.h> +#include <zfs_ioctl_compat.h> +#include "libzfs_core_compat.h" + +extern int zfs_ioctl_version; + +int +lzc_compat_pre(zfs_cmd_t *zc, zfs_ioc_t *ioc, nvlist_t **source) +{ + nvlist_t *nvl = NULL; + nvpair_t *pair, *hpair; + char *buf, *val; + zfs_ioc_t vecnum; + uint32_t type32; + int32_t cleanup_fd; + int error = 0; + int pos; + + if (zfs_ioctl_version >= ZFS_IOCVER_LZC) + return (0); + + vecnum = *ioc; + + switch (vecnum) { + case ZFS_IOC_CREATE: + type32 = fnvlist_lookup_int32(*source, "type"); + zc->zc_objset_type = (uint64_t)type32; + nvlist_lookup_nvlist(*source, "props", &nvl); + *source = nvl; + break; + case ZFS_IOC_CLONE: + buf = fnvlist_lookup_string(*source, "origin"); + strlcpy(zc->zc_value, buf, MAXPATHLEN); + nvlist_lookup_nvlist(*source, "props", &nvl); + *ioc = ZFS_IOC_CREATE; + *source = nvl; + break; + case ZFS_IOC_SNAPSHOT: + nvl = fnvlist_lookup_nvlist(*source, "snaps"); + pair = nvlist_next_nvpair(nvl, NULL); + if (pair != NULL) { + buf = nvpair_name(pair); + pos = strcspn(buf, "@"); + strlcpy(zc->zc_name, buf, pos + 1); + strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); + } else + error = EINVAL; + /* old kernel cannot create multiple snapshots */ + if (!error && nvlist_next_nvpair(nvl, pair) != NULL) + error = EOPNOTSUPP; + nvlist_free(nvl); + nvl = NULL; + nvlist_lookup_nvlist(*source, "props", &nvl); + *source = nvl; + break; + case ZFS_IOC_SPACE_SNAPS: + buf = fnvlist_lookup_string(*source, "firstsnap"); + strlcpy(zc->zc_value, buf, MAXPATHLEN); + break; + case ZFS_IOC_DESTROY_SNAPS: + nvl = fnvlist_lookup_nvlist(*source, "snaps"); + pair = nvlist_next_nvpair(nvl, NULL); + if (pair != NULL) { + buf = nvpair_name(pair); + pos = strcspn(buf, "@"); + strlcpy(zc->zc_name, buf, pos + 1); + } else + error = EINVAL; + /* old kernel cannot atomically destroy multiple snaps */ + if (!error && nvlist_next_nvpair(nvl, pair) != NULL) + error = EOPNOTSUPP; + *source = nvl; + break; + case ZFS_IOC_HOLD: + nvl = fnvlist_lookup_nvlist(*source, "holds"); + pair = nvlist_next_nvpair(nvl, NULL); + if (pair != NULL) { + buf = nvpair_name(pair); + pos = strcspn(buf, "@"); + strlcpy(zc->zc_name, buf, pos + 1); + strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); + if (nvpair_value_string(pair, &val) == 0) + strlcpy(zc->zc_string, val, MAXNAMELEN); + else + error = EINVAL; + } else + error = EINVAL; + /* old kernel cannot atomically create multiple holds */ + if (!error && nvlist_next_nvpair(nvl, pair) != NULL) + error = EOPNOTSUPP; + nvlist_free(nvl); + if (nvlist_lookup_int32(*source, "cleanup_fd", + &cleanup_fd) == 0) + zc->zc_cleanup_fd = cleanup_fd; + else + zc->zc_cleanup_fd = -1; + break; + case ZFS_IOC_RELEASE: + pair = nvlist_next_nvpair(*source, NULL); + if (pair != NULL) { + buf = nvpair_name(pair); + pos = strcspn(buf, "@"); + strlcpy(zc->zc_name, buf, pos + 1); + strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); + if (nvpair_value_nvlist(pair, &nvl) == 0) { + hpair = nvlist_next_nvpair(nvl, NULL); + if (hpair != NULL) + strlcpy(zc->zc_string, + nvpair_name(hpair), MAXNAMELEN); + else + error = EINVAL; + if (!error && nvlist_next_nvpair(nvl, + hpair) != NULL) + error = EOPNOTSUPP; + } else + error = EINVAL; + } else + error = EINVAL; + /* old kernel cannot atomically release multiple holds */ + if (!error && nvlist_next_nvpair(nvl, pair) != NULL) + error = EOPNOTSUPP; + break; + } + + return (error); +} + +void +lzc_compat_post(zfs_cmd_t *zc, const zfs_ioc_t ioc) +{ + if (zfs_ioctl_version >= ZFS_IOCVER_LZC) + return; + + switch (ioc) { + case ZFS_IOC_CREATE: + case ZFS_IOC_CLONE: + case ZFS_IOC_SNAPSHOT: + case ZFS_IOC_SPACE_SNAPS: + case ZFS_IOC_DESTROY_SNAPS: + zc->zc_nvlist_dst_filled = B_FALSE; + break; + } +} + +int +lzc_compat_outnvl(zfs_cmd_t *zc, const zfs_ioc_t ioc, nvlist_t **outnvl) +{ + nvlist_t *nvl; + + if (zfs_ioctl_version >= ZFS_IOCVER_LZC) + return (0); + + switch (ioc) { + case ZFS_IOC_SPACE_SNAPS: + nvl = fnvlist_alloc(); + fnvlist_add_uint64(nvl, "used", zc->zc_cookie); + fnvlist_add_uint64(nvl, "compressed", zc->zc_objset_type); + fnvlist_add_uint64(nvl, "uncompressed", zc->zc_perm_action); + *outnvl = nvl; + break; + } + + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h new file mode 100644 index 0000000..6527c4b --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 by Martin Matuska <mm@FreeBSD.org>. All rights reserved. + */ + +#ifndef _LIBZFS_CORE_COMPAT_H +#define _LIBZFS_CORE_COMPAT_H + +#include <libnvpair.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/fs/zfs.h> +#include <sys/zfs_ioctl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +int lzc_compat_pre(zfs_cmd_t *, zfs_ioc_t *, nvlist_t **); +void lzc_compat_post(zfs_cmd_t *, const zfs_ioc_t); +int lzc_compat_outnvl(zfs_cmd_t *, const zfs_ioc_t, nvlist_t **); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_CORE_COMPAT_H */ diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c new file mode 100644 index 0000000..ef38696 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c @@ -0,0 +1,1200 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ + +#include <assert.h> +#include <fcntl.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <zlib.h> +#include <libgen.h> +#include <sys/spa.h> +#include <sys/stat.h> +#include <sys/processor.h> +#include <sys/zfs_context.h> +#include <sys/rrwlock.h> +#include <sys/zmod.h> +#include <sys/utsname.h> +#include <sys/systeminfo.h> + +/* + * Emulation of kernel services in userland. + */ + +int aok; +uint64_t physmem; +vnode_t *rootdir = (vnode_t *)0xabcd1234; +char hw_serial[HW_HOSTID_LEN]; +#ifdef illumos +kmutex_t cpu_lock; +#endif + +/* If set, all blocks read will be copied to the specified directory. */ +char *vn_dumpdir = NULL; + +struct utsname utsname = { + "userland", "libzpool", "1", "1", "na" +}; + +/* this only exists to have its address taken */ +struct proc p0; + +/* + * ========================================================================= + * threads + * ========================================================================= + */ +/*ARGSUSED*/ +kthread_t * +zk_thread_create(void (*func)(), void *arg) +{ + thread_t tid; + + VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, + &tid) == 0); + + return ((void *)(uintptr_t)tid); +} + +/* + * ========================================================================= + * kstats + * ========================================================================= + */ +/*ARGSUSED*/ +kstat_t * +kstat_create(char *module, int instance, char *name, char *class, + uchar_t type, ulong_t ndata, uchar_t ks_flag) +{ + return (NULL); +} + +/*ARGSUSED*/ +void +kstat_install(kstat_t *ksp) +{} + +/*ARGSUSED*/ +void +kstat_delete(kstat_t *ksp) +{} + +/* + * ========================================================================= + * mutexes + * ========================================================================= + */ +void +zmutex_init(kmutex_t *mp) +{ + mp->m_owner = NULL; + mp->initialized = B_TRUE; + (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); +} + +void +zmutex_destroy(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + ASSERT(mp->m_owner == NULL); + (void) _mutex_destroy(&(mp)->m_lock); + mp->m_owner = (void *)-1UL; + mp->initialized = B_FALSE; +} + +int +zmutex_owned(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + + return (mp->m_owner == curthread); +} + +void +mutex_enter(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + ASSERT(mp->m_owner != (void *)-1UL); + ASSERT(mp->m_owner != curthread); + VERIFY(mutex_lock(&mp->m_lock) == 0); + ASSERT(mp->m_owner == NULL); + mp->m_owner = curthread; +} + +int +mutex_tryenter(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + ASSERT(mp->m_owner != (void *)-1UL); + if (0 == mutex_trylock(&mp->m_lock)) { + ASSERT(mp->m_owner == NULL); + mp->m_owner = curthread; + return (1); + } else { + return (0); + } +} + +void +mutex_exit(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + VERIFY(mutex_unlock(&mp->m_lock) == 0); +} + +void * +mutex_owner(kmutex_t *mp) +{ + ASSERT(mp->initialized == B_TRUE); + return (mp->m_owner); +} + +/* + * ========================================================================= + * rwlocks + * ========================================================================= + */ +/*ARGSUSED*/ +void +rw_init(krwlock_t *rwlp, char *name, int type, void *arg) +{ + rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); + rwlp->rw_owner = NULL; + rwlp->initialized = B_TRUE; + rwlp->rw_count = 0; +} + +void +rw_destroy(krwlock_t *rwlp) +{ + ASSERT(rwlp->rw_count == 0); + rwlock_destroy(&rwlp->rw_lock); + rwlp->rw_owner = (void *)-1UL; + rwlp->initialized = B_FALSE; +} + +void +rw_enter(krwlock_t *rwlp, krw_t rw) +{ + //ASSERT(!RW_LOCK_HELD(rwlp)); + ASSERT(rwlp->initialized == B_TRUE); + ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT(rwlp->rw_owner != curthread); + + if (rw == RW_READER) { + VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); + ASSERT(rwlp->rw_count >= 0); + atomic_add_int(&rwlp->rw_count, 1); + } else { + VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); + ASSERT(rwlp->rw_count == 0); + rwlp->rw_count = -1; + rwlp->rw_owner = curthread; + } +} + +void +rw_exit(krwlock_t *rwlp) +{ + ASSERT(rwlp->initialized == B_TRUE); + ASSERT(rwlp->rw_owner != (void *)-1UL); + + if (rwlp->rw_owner == curthread) { + /* Write locked. */ + ASSERT(rwlp->rw_count == -1); + rwlp->rw_count = 0; + rwlp->rw_owner = NULL; + } else { + /* Read locked. */ + ASSERT(rwlp->rw_count > 0); + atomic_add_int(&rwlp->rw_count, -1); + } + VERIFY(rw_unlock(&rwlp->rw_lock) == 0); +} + +int +rw_tryenter(krwlock_t *rwlp, krw_t rw) +{ + int rv; + + ASSERT(rwlp->initialized == B_TRUE); + ASSERT(rwlp->rw_owner != (void *)-1UL); + ASSERT(rwlp->rw_owner != curthread); + + if (rw == RW_READER) + rv = rw_tryrdlock(&rwlp->rw_lock); + else + rv = rw_trywrlock(&rwlp->rw_lock); + + if (rv == 0) { + ASSERT(rwlp->rw_owner == NULL); + if (rw == RW_READER) { + ASSERT(rwlp->rw_count >= 0); + atomic_add_int(&rwlp->rw_count, 1); + } else { + ASSERT(rwlp->rw_count == 0); + rwlp->rw_count = -1; + rwlp->rw_owner = curthread; + } + return (1); + } + + return (0); +} + +/*ARGSUSED*/ +int +rw_tryupgrade(krwlock_t *rwlp) +{ + ASSERT(rwlp->initialized == B_TRUE); + ASSERT(rwlp->rw_owner != (void *)-1UL); + + return (0); +} + +int +rw_lock_held(krwlock_t *rwlp) +{ + + return (rwlp->rw_count != 0); +} + +/* + * ========================================================================= + * condition variables + * ========================================================================= + */ +/*ARGSUSED*/ +void +cv_init(kcondvar_t *cv, char *name, int type, void *arg) +{ + VERIFY(cond_init(cv, name, NULL) == 0); +} + +void +cv_destroy(kcondvar_t *cv) +{ + VERIFY(cond_destroy(cv) == 0); +} + +void +cv_wait(kcondvar_t *cv, kmutex_t *mp) +{ + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + int ret = cond_wait(cv, &mp->m_lock); + VERIFY(ret == 0 || ret == EINTR); + mp->m_owner = curthread; +} + +clock_t +cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) +{ + int error; + struct timespec ts; + struct timeval tv; + clock_t delta; + + abstime += ddi_get_lbolt(); +top: + delta = abstime - ddi_get_lbolt(); + if (delta <= 0) + return (-1); + + if (gettimeofday(&tv, NULL) != 0) + assert(!"gettimeofday() failed"); + + ts.tv_sec = tv.tv_sec + delta / hz; + ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz); + ASSERT(ts.tv_nsec >= 0); + + if (ts.tv_nsec >= NANOSEC) { + ts.tv_sec++; + ts.tv_nsec -= NANOSEC; + } + + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); + mp->m_owner = curthread; + + if (error == EINTR) + goto top; + + if (error == ETIMEDOUT) + return (-1); + + ASSERT(error == 0); + + return (1); +} + +/*ARGSUSED*/ +clock_t +cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, + int flag) +{ + int error; + timestruc_t ts; + hrtime_t delta; + + ASSERT(flag == 0); + +top: + delta = tim - gethrtime(); + if (delta <= 0) + return (-1); + + ts.tv_sec = delta / NANOSEC; + ts.tv_nsec = delta % NANOSEC; + + ASSERT(mutex_owner(mp) == curthread); + mp->m_owner = NULL; + error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); + mp->m_owner = curthread; + + if (error == ETIMEDOUT) + return (-1); + + if (error == EINTR) + goto top; + + ASSERT(error == 0); + + return (1); +} + +void +cv_signal(kcondvar_t *cv) +{ + VERIFY(cond_signal(cv) == 0); +} + +void +cv_broadcast(kcondvar_t *cv) +{ + VERIFY(cond_broadcast(cv) == 0); +} + +/* + * ========================================================================= + * vnode operations + * ========================================================================= + */ +/* + * Note: for the xxxat() versions of these functions, we assume that the + * starting vp is always rootdir (which is true for spa_directory.c, the only + * ZFS consumer of these interfaces). We assert this is true, and then emulate + * them by adding '/' in front of the path. + */ + +/*ARGSUSED*/ +int +vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) +{ + int fd; + int dump_fd; + vnode_t *vp; + int old_umask; + char realpath[MAXPATHLEN]; + struct stat64 st; + + /* + * If we're accessing a real disk from userland, we need to use + * the character interface to avoid caching. This is particularly + * important if we're trying to look at a real in-kernel storage + * pool from userland, e.g. via zdb, because otherwise we won't + * see the changes occurring under the segmap cache. + * On the other hand, the stupid character device returns zero + * for its size. So -- gag -- we open the block device to get + * its size, and remember it for subsequent VOP_GETATTR(). + */ + if (strncmp(path, "/dev/", 5) == 0) { + char *dsk; + fd = open64(path, O_RDONLY); + if (fd == -1) + return (errno); + if (fstat64(fd, &st) == -1) { + close(fd); + return (errno); + } + close(fd); + (void) sprintf(realpath, "%s", path); + dsk = strstr(path, "/dsk/"); + if (dsk != NULL) + (void) sprintf(realpath + (dsk - path) + 1, "r%s", + dsk + 1); + } else { + (void) sprintf(realpath, "%s", path); + if (!(flags & FCREAT) && stat64(realpath, &st) == -1) + return (errno); + } + + if (flags & FCREAT) + old_umask = umask(0); + + /* + * The construct 'flags - FREAD' conveniently maps combinations of + * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. + */ + fd = open64(realpath, flags - FREAD, mode); + + if (flags & FCREAT) + (void) umask(old_umask); + + if (vn_dumpdir != NULL) { + char dumppath[MAXPATHLEN]; + (void) snprintf(dumppath, sizeof (dumppath), + "%s/%s", vn_dumpdir, basename(realpath)); + dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); + if (dump_fd == -1) + return (errno); + } else { + dump_fd = -1; + } + + if (fd == -1) + return (errno); + + if (fstat64(fd, &st) == -1) { + close(fd); + return (errno); + } + + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); + + *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); + + vp->v_fd = fd; + vp->v_size = st.st_size; + vp->v_path = spa_strdup(path); + vp->v_dump_fd = dump_fd; + + return (0); +} + +/*ARGSUSED*/ +int +vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, + int x3, vnode_t *startvp, int fd) +{ + char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); + int ret; + + ASSERT(startvp == rootdir); + (void) sprintf(realpath, "/%s", path); + + /* fd ignored for now, need if want to simulate nbmand support */ + ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); + + umem_free(realpath, strlen(path) + 2); + + return (ret); +} + +/*ARGSUSED*/ +int +vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, + int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) +{ + ssize_t iolen, split; + + if (uio == UIO_READ) { + iolen = pread64(vp->v_fd, addr, len, offset); + if (vp->v_dump_fd != -1) { + int status = + pwrite64(vp->v_dump_fd, addr, iolen, offset); + ASSERT(status != -1); + } + } else { + /* + * To simulate partial disk writes, we split writes into two + * system calls so that the process can be killed in between. + */ + int sectors = len >> SPA_MINBLOCKSHIFT; + split = (sectors > 0 ? rand() % sectors : 0) << + SPA_MINBLOCKSHIFT; + iolen = pwrite64(vp->v_fd, addr, split, offset); + iolen += pwrite64(vp->v_fd, (char *)addr + split, + len - split, offset + split); + } + + if (iolen == -1) + return (errno); + if (residp) + *residp = len - iolen; + else if (iolen != len) + return (EIO); + return (0); +} + +void +vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td) +{ + close(vp->v_fd); + if (vp->v_dump_fd != -1) + close(vp->v_dump_fd); + spa_strfree(vp->v_path); + umem_free(vp, sizeof (vnode_t)); +} + +/* + * At a minimum we need to update the size since vdev_reopen() + * will no longer call vn_openat(). + */ +int +fop_getattr(vnode_t *vp, vattr_t *vap) +{ + struct stat64 st; + + if (fstat64(vp->v_fd, &st) == -1) { + close(vp->v_fd); + return (errno); + } + + vap->va_size = st.st_size; + return (0); +} + +#ifdef ZFS_DEBUG + +/* + * ========================================================================= + * Figure out which debugging statements to print + * ========================================================================= + */ + +static char *dprintf_string; +static int dprintf_print_all; + +int +dprintf_find_string(const char *string) +{ + char *tmp_str = dprintf_string; + int len = strlen(string); + + /* + * Find out if this is a string we want to print. + * String format: file1.c,function_name1,file2.c,file3.c + */ + + while (tmp_str != NULL) { + if (strncmp(tmp_str, string, len) == 0 && + (tmp_str[len] == ',' || tmp_str[len] == '\0')) + return (1); + tmp_str = strchr(tmp_str, ','); + if (tmp_str != NULL) + tmp_str++; /* Get rid of , */ + } + return (0); +} + +void +dprintf_setup(int *argc, char **argv) +{ + int i, j; + + /* + * Debugging can be specified two ways: by setting the + * environment variable ZFS_DEBUG, or by including a + * "debug=..." argument on the command line. The command + * line setting overrides the environment variable. + */ + + for (i = 1; i < *argc; i++) { + int len = strlen("debug="); + /* First look for a command line argument */ + if (strncmp("debug=", argv[i], len) == 0) { + dprintf_string = argv[i] + len; + /* Remove from args */ + for (j = i; j < *argc; j++) + argv[j] = argv[j+1]; + argv[j] = NULL; + (*argc)--; + } + } + + if (dprintf_string == NULL) { + /* Look for ZFS_DEBUG environment variable */ + dprintf_string = getenv("ZFS_DEBUG"); + } + + /* + * Are we just turning on all debugging? + */ + if (dprintf_find_string("on")) + dprintf_print_all = 1; + + if (dprintf_string != NULL) + zfs_flags |= ZFS_DEBUG_DPRINTF; +} + +int +sysctl_handle_64(SYSCTL_HANDLER_ARGS) +{ + return (0); +} + +/* + * ========================================================================= + * debug printfs + * ========================================================================= + */ +void +__dprintf(const char *file, const char *func, int line, const char *fmt, ...) +{ + const char *newfile; + va_list adx; + + /* + * Get rid of annoying "../common/" prefix to filename. + */ + newfile = strrchr(file, '/'); + if (newfile != NULL) { + newfile = newfile + 1; /* Get rid of leading / */ + } else { + newfile = file; + } + + if (dprintf_print_all || + dprintf_find_string(newfile) || + dprintf_find_string(func)) { + /* Print out just the function name if requested */ + flockfile(stdout); + if (dprintf_find_string("pid")) + (void) printf("%d ", getpid()); + if (dprintf_find_string("tid")) + (void) printf("%lu ", thr_self()); +#if 0 + if (dprintf_find_string("cpu")) + (void) printf("%u ", getcpuid()); +#endif + if (dprintf_find_string("time")) + (void) printf("%llu ", gethrtime()); + if (dprintf_find_string("long")) + (void) printf("%s, line %d: ", newfile, line); + (void) printf("%s: ", func); + va_start(adx, fmt); + (void) vprintf(fmt, adx); + va_end(adx); + funlockfile(stdout); + } +} + +#endif /* ZFS_DEBUG */ + +/* + * ========================================================================= + * cmn_err() and panic() + * ========================================================================= + */ +static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; +static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; + +void +vpanic(const char *fmt, va_list adx) +{ + (void) fprintf(stderr, "error: "); + (void) vfprintf(stderr, fmt, adx); + (void) fprintf(stderr, "\n"); + + abort(); /* think of it as a "user-level crash dump" */ +} + +void +panic(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vpanic(fmt, adx); + va_end(adx); +} + +void +vcmn_err(int ce, const char *fmt, va_list adx) +{ + if (ce == CE_PANIC) + vpanic(fmt, adx); + if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ + (void) fprintf(stderr, "%s", ce_prefix[ce]); + (void) vfprintf(stderr, fmt, adx); + (void) fprintf(stderr, "%s", ce_suffix[ce]); + } +} + +/*PRINTFLIKE2*/ +void +cmn_err(int ce, const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vcmn_err(ce, fmt, adx); + va_end(adx); +} + +/* + * ========================================================================= + * kobj interfaces + * ========================================================================= + */ +struct _buf * +kobj_open_file(char *name) +{ + struct _buf *file; + vnode_t *vp; + + /* set vp as the _fd field of the file */ + if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, + -1) != 0) + return ((void *)-1UL); + + file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); + file->_fd = (intptr_t)vp; + return (file); +} + +int +kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) +{ + ssize_t resid; + + vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, + UIO_SYSSPACE, 0, 0, 0, &resid); + + return (size - resid); +} + +void +kobj_close_file(struct _buf *file) +{ + vn_close((vnode_t *)file->_fd, 0, NULL, NULL); + umem_free(file, sizeof (struct _buf)); +} + +int +kobj_get_filesize(struct _buf *file, uint64_t *size) +{ + struct stat64 st; + vnode_t *vp = (vnode_t *)file->_fd; + + if (fstat64(vp->v_fd, &st) == -1) { + vn_close(vp, 0, NULL, NULL); + return (errno); + } + *size = st.st_size; + return (0); +} + +/* + * ========================================================================= + * misc routines + * ========================================================================= + */ + +void +delay(clock_t ticks) +{ + poll(0, 0, ticks * (1000 / hz)); +} + +#if 0 +/* + * Find highest one bit set. + * Returns bit number + 1 of highest bit that is set, otherwise returns 0. + */ +int +highbit64(uint64_t i) +{ + int h = 1; + + if (i == 0) + return (0); + if (i & 0xffffffff00000000ULL) { + h += 32; i >>= 32; + } + if (i & 0xffff0000) { + h += 16; i >>= 16; + } + if (i & 0xff00) { + h += 8; i >>= 8; + } + if (i & 0xf0) { + h += 4; i >>= 4; + } + if (i & 0xc) { + h += 2; i >>= 2; + } + if (i & 0x2) { + h += 1; + } + return (h); +} +#endif + +static int random_fd = -1, urandom_fd = -1; + +static int +random_get_bytes_common(uint8_t *ptr, size_t len, int fd) +{ + size_t resid = len; + ssize_t bytes; + + ASSERT(fd != -1); + + while (resid != 0) { + bytes = read(fd, ptr, resid); + ASSERT3S(bytes, >=, 0); + ptr += bytes; + resid -= bytes; + } + + return (0); +} + +int +random_get_bytes(uint8_t *ptr, size_t len) +{ + return (random_get_bytes_common(ptr, len, random_fd)); +} + +int +random_get_pseudo_bytes(uint8_t *ptr, size_t len) +{ + return (random_get_bytes_common(ptr, len, urandom_fd)); +} + +int +ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) +{ + char *end; + + *result = strtoul(hw_serial, &end, base); + if (*result == 0) + return (errno); + return (0); +} + +int +ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result) +{ + char *end; + + *result = strtoull(str, &end, base); + if (*result == 0) + return (errno); + return (0); +} + +#ifdef illumos +/* ARGSUSED */ +cyclic_id_t +cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when) +{ + return (1); +} + +/* ARGSUSED */ +void +cyclic_remove(cyclic_id_t id) +{ +} + +/* ARGSUSED */ +int +cyclic_reprogram(cyclic_id_t id, hrtime_t expiration) +{ + return (1); +} +#endif + +/* + * ========================================================================= + * kernel emulation setup & teardown + * ========================================================================= + */ +static int +umem_out_of_memory(void) +{ + char errmsg[] = "out of memory -- generating core dump\n"; + + write(fileno(stderr), errmsg, sizeof (errmsg)); + abort(); + return (0); +} + +void +kernel_init(int mode) +{ + extern uint_t rrw_tsd_key; + + umem_nofail_callback(umem_out_of_memory); + + physmem = sysconf(_SC_PHYS_PAGES); + + dprintf("physmem = %llu pages (%.2f GB)\n", physmem, + (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); + + (void) snprintf(hw_serial, sizeof (hw_serial), "%lu", + (mode & FWRITE) ? (unsigned long)gethostid() : 0); + + VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); + VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); + + system_taskq_init(); + +#ifdef illumos + mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL); +#endif + + spa_init(mode); + + tsd_create(&rrw_tsd_key, rrw_tsd_destroy); +} + +void +kernel_fini(void) +{ + spa_fini(); + + system_taskq_fini(); + + close(random_fd); + close(urandom_fd); + + random_fd = -1; + urandom_fd = -1; +} + +int +z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) +{ + int ret; + uLongf len = *dstlen; + + if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK) + *dstlen = (size_t)len; + + return (ret); +} + +int +z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, + int level) +{ + int ret; + uLongf len = *dstlen; + + if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK) + *dstlen = (size_t)len; + + return (ret); +} + +uid_t +crgetuid(cred_t *cr) +{ + return (0); +} + +uid_t +crgetruid(cred_t *cr) +{ + return (0); +} + +gid_t +crgetgid(cred_t *cr) +{ + return (0); +} + +int +crgetngroups(cred_t *cr) +{ + return (0); +} + +gid_t * +crgetgroups(cred_t *cr) +{ + return (NULL); +} + +int +zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) +{ + return (0); +} + +int +zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) +{ + return (0); +} + +int +zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) +{ + return (0); +} + +ksiddomain_t * +ksid_lookupdomain(const char *dom) +{ + ksiddomain_t *kd; + + kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL); + kd->kd_name = spa_strdup(dom); + return (kd); +} + +void +ksiddomain_rele(ksiddomain_t *ksid) +{ + spa_strfree(ksid->kd_name); + umem_free(ksid, sizeof (ksiddomain_t)); +} + +/* + * Do not change the length of the returned string; it must be freed + * with strfree(). + */ +char * +kmem_asprintf(const char *fmt, ...) +{ + int size; + va_list adx; + char *buf; + + va_start(adx, fmt); + size = vsnprintf(NULL, 0, fmt, adx) + 1; + va_end(adx); + + buf = kmem_alloc(size, KM_SLEEP); + + va_start(adx, fmt); + size = vsnprintf(buf, size, fmt, adx); + va_end(adx); + + return (buf); +} + +/* ARGSUSED */ +int +zfs_onexit_fd_hold(int fd, minor_t *minorp) +{ + *minorp = 0; + return (0); +} + +/* ARGSUSED */ +void +zfs_onexit_fd_rele(int fd) +{ +} + +/* ARGSUSED */ +int +zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, + uint64_t *action_handle) +{ + return (0); +} + +/* ARGSUSED */ +int +zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) +{ + return (0); +} + +/* ARGSUSED */ +int +zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) +{ + return (0); +} + +#ifdef __FreeBSD__ +/* ARGSUSED */ +int +zvol_create_minors(const char *name) +{ + return (0); +} +#endif + +#ifdef illumos +void +bioinit(buf_t *bp) +{ + bzero(bp, sizeof (buf_t)); +} + +void +biodone(buf_t *bp) +{ + if (bp->b_iodone != NULL) { + (*(bp->b_iodone))(bp); + return; + } + ASSERT((bp->b_flags & B_DONE) == 0); + bp->b_flags |= B_DONE; +} + +void +bioerror(buf_t *bp, int error) +{ + ASSERT(bp != NULL); + ASSERT(error >= 0); + + if (error != 0) { + bp->b_flags |= B_ERROR; + } else { + bp->b_flags &= ~B_ERROR; + } + bp->b_error = error; +} + + +int +geterror(struct buf *bp) +{ + int error = 0; + + if (bp->b_flags & B_ERROR) { + error = bp->b_error; + if (!error) + error = EIO; + } + return (error); +} +#endif diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h new file mode 100644 index 0000000..03027c3 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h @@ -0,0 +1,819 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ + +#ifndef _SYS_ZFS_CONTEXT_H +#define _SYS_ZFS_CONTEXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define _SYS_MUTEX_H +#define _SYS_RWLOCK_H +#define _SYS_CONDVAR_H +#define _SYS_SYSTM_H +#define _SYS_T_LOCK_H +#define _SYS_VNODE_H +#define _SYS_VFS_H +#define _SYS_SUNDDI_H +#define _SYS_CALLB_H +#define _SYS_SCHED_H_ + +#include <solaris.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <stdarg.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <strings.h> +#include <thread.h> +#include <assert.h> +#include <limits.h> +#include <dirent.h> +#include <time.h> +#include <math.h> +#include <umem.h> +#include <inttypes.h> +#include <fsshare.h> +#include <pthread.h> +#include <sched.h> +#include <sys/debug.h> +#include <sys/note.h> +#include <sys/types.h> +#include <sys/cred.h> +#include <sys/atomic.h> +#include <sys/sysmacros.h> +#include <sys/bitmap.h> +#include <sys/resource.h> +#include <sys/byteorder.h> +#include <sys/list.h> +#include <sys/time.h> +#include <sys/uio.h> +#include <sys/mntent.h> +#include <sys/mnttab.h> +#include <sys/zfs_debug.h> +#include <sys/sdt.h> +#include <sys/kstat.h> +#include <sys/u8_textprep.h> +#include <sys/kernel.h> +#include <sys/disk.h> +#include <sys/sysevent.h> +#include <sys/sysevent/eventdefs.h> +#include <sys/sysevent/dev.h> +#include <machine/atomic.h> +#include <sys/debug.h> +#ifdef illumos +#include "zfs.h" +#endif + +#define ZFS_EXPORTS_PATH "/etc/zfs/exports" + +/* + * Debugging + */ + +/* + * Note that we are not using the debugging levels. + */ + +#define CE_CONT 0 /* continuation */ +#define CE_NOTE 1 /* notice */ +#define CE_WARN 2 /* warning */ +#define CE_PANIC 3 /* panic */ +#define CE_IGNORE 4 /* print nothing */ + +/* + * ZFS debugging + */ + +#define ZFS_LOG(...) do { } while (0) + +typedef u_longlong_t rlim64_t; +#define RLIM64_INFINITY ((rlim64_t)-3) + +#ifdef ZFS_DEBUG +extern void dprintf_setup(int *argc, char **argv); +#endif /* ZFS_DEBUG */ + +extern void cmn_err(int, const char *, ...); +extern void vcmn_err(int, const char *, __va_list); +extern void panic(const char *, ...); +extern void vpanic(const char *, __va_list); + +#define fm_panic panic + +extern int aok; + +/* + * DTrace SDT probes have different signatures in userland than they do in + * kernel. If they're being used in kernel code, re-define them out of + * existence for their counterparts in libzpool. + */ + +#ifdef DTRACE_PROBE +#undef DTRACE_PROBE +#endif /* DTRACE_PROBE */ +#ifdef illumos +#define DTRACE_PROBE(a) \ + ZFS_PROBE0(#a) +#endif + +#ifdef DTRACE_PROBE1 +#undef DTRACE_PROBE1 +#endif /* DTRACE_PROBE1 */ +#ifdef illumos +#define DTRACE_PROBE1(a, b, c) \ + ZFS_PROBE1(#a, (unsigned long)c) +#endif + +#ifdef DTRACE_PROBE2 +#undef DTRACE_PROBE2 +#endif /* DTRACE_PROBE2 */ +#ifdef illumos +#define DTRACE_PROBE2(a, b, c, d, e) \ + ZFS_PROBE2(#a, (unsigned long)c, (unsigned long)e) +#endif + +#ifdef DTRACE_PROBE3 +#undef DTRACE_PROBE3 +#endif /* DTRACE_PROBE3 */ +#ifdef illumos +#define DTRACE_PROBE3(a, b, c, d, e, f, g) \ + ZFS_PROBE3(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g) +#endif + +#ifdef DTRACE_PROBE4 +#undef DTRACE_PROBE4 +#endif /* DTRACE_PROBE4 */ +#ifdef illumos +#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) \ + ZFS_PROBE4(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g, \ + (unsigned long)i) +#endif + +#ifdef illumos +/* + * We use the comma operator so that this macro can be used without much + * additional code. For example, "return (EINVAL);" becomes + * "return (SET_ERROR(EINVAL));". Note that the argument will be evaluated + * twice, so it should not have side effects (e.g. something like: + * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice). + */ +#define SET_ERROR(err) (ZFS_SET_ERROR(err), err) +#else /* !illumos */ + +#define DTRACE_PROBE(a) ((void)0) +#define DTRACE_PROBE1(a, b, c) ((void)0) +#define DTRACE_PROBE2(a, b, c, d, e) ((void)0) +#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0) +#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0) + +#define SET_ERROR(err) (err) +#endif /* !illumos */ + +/* + * Threads + */ +#define curthread ((void *)(uintptr_t)thr_self()) + +#define kpreempt(x) sched_yield() + +typedef struct kthread kthread_t; + +#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ + zk_thread_create(func, arg) +#define thread_exit() thr_exit(NULL) +#define thread_join(t) panic("libzpool cannot join threads") + +#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS) + +/* in libzpool, p0 exists only to have its address taken */ +struct proc { + uintptr_t this_is_never_used_dont_dereference_it; +}; + +extern struct proc p0; +#define curproc (&p0) + +#define PS_NONE -1 + +extern kthread_t *zk_thread_create(void (*func)(), void *arg); + +#define issig(why) (FALSE) +#define ISSIG(thr, why) (FALSE) + +/* + * Mutexes + */ +typedef struct kmutex { + void *m_owner; + boolean_t initialized; + mutex_t m_lock; +} kmutex_t; + +#define MUTEX_DEFAULT USYNC_THREAD +#undef MUTEX_HELD +#undef MUTEX_NOT_HELD +#define MUTEX_HELD(m) ((m)->m_owner == curthread) +#define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m)) +#define _mutex_held(m) pthread_mutex_isowned_np(m) + +/* + * Argh -- we have to get cheesy here because the kernel and userland + * have different signatures for the same routine. + */ +//extern int _mutex_init(mutex_t *mp, int type, void *arg); +//extern int _mutex_destroy(mutex_t *mp); +//extern int _mutex_owned(mutex_t *mp); + +#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp)) +#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp)) +#define mutex_owned(mp) zmutex_owned((kmutex_t *)(mp)) + +extern void zmutex_init(kmutex_t *mp); +extern void zmutex_destroy(kmutex_t *mp); +extern int zmutex_owned(kmutex_t *mp); +extern void mutex_enter(kmutex_t *mp); +extern void mutex_exit(kmutex_t *mp); +extern int mutex_tryenter(kmutex_t *mp); +extern void *mutex_owner(kmutex_t *mp); + +/* + * RW locks + */ +typedef struct krwlock { + int rw_count; + void *rw_owner; + boolean_t initialized; + rwlock_t rw_lock; +} krwlock_t; + +typedef int krw_t; + +#define RW_READER 0 +#define RW_WRITER 1 +#define RW_DEFAULT USYNC_THREAD + +#undef RW_READ_HELD +#define RW_READ_HELD(x) ((x)->rw_owner == NULL && (x)->rw_count > 0) + +#undef RW_WRITE_HELD +#define RW_WRITE_HELD(x) ((x)->rw_owner == curthread) +#define RW_LOCK_HELD(x) rw_lock_held(x) + +#undef RW_LOCK_HELD +#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) + +extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg); +extern void rw_destroy(krwlock_t *rwlp); +extern void rw_enter(krwlock_t *rwlp, krw_t rw); +extern int rw_tryenter(krwlock_t *rwlp, krw_t rw); +extern int rw_tryupgrade(krwlock_t *rwlp); +extern void rw_exit(krwlock_t *rwlp); +extern int rw_lock_held(krwlock_t *rwlp); +#define rw_downgrade(rwlp) do { } while (0) + +extern uid_t crgetuid(cred_t *cr); +extern uid_t crgetruid(cred_t *cr); +extern gid_t crgetgid(cred_t *cr); +extern int crgetngroups(cred_t *cr); +extern gid_t *crgetgroups(cred_t *cr); + +/* + * Condition variables + */ +typedef cond_t kcondvar_t; + +#define CV_DEFAULT USYNC_THREAD + +extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); +extern void cv_destroy(kcondvar_t *cv); +extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); +extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); +extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, + hrtime_t res, int flag); +extern void cv_signal(kcondvar_t *cv); +extern void cv_broadcast(kcondvar_t *cv); + +/* + * Thread-specific data + */ +#define tsd_get(k) pthread_getspecific(k) +#define tsd_set(k, v) pthread_setspecific(k, v) +#define tsd_create(kp, d) pthread_key_create(kp, d) +#define tsd_destroy(kp) /* nothing */ + +/* + * Kernel memory + */ +#define KM_SLEEP UMEM_NOFAIL +#define KM_PUSHPAGE KM_SLEEP +#define KM_NOSLEEP UMEM_DEFAULT +#define KMC_NODEBUG UMC_NODEBUG +#define KMC_NOTOUCH 0 /* not needed for userland caches */ +#define KM_NODEBUG 0 +#define kmem_alloc(_s, _f) umem_alloc(_s, _f) +#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f) +#define kmem_free(_b, _s) umem_free(_b, _s) +#define kmem_size() (physmem * PAGESIZE) +#define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ + umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) +#define kmem_cache_destroy(_c) umem_cache_destroy(_c) +#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f) +#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b) +#define kmem_debugging() 0 +#define kmem_cache_reap_now(_c) /* nothing */ +#define kmem_cache_set_move(_c, _cb) /* nothing */ +#define POINTER_INVALIDATE(_pp) /* nothing */ +#define POINTER_IS_VALID(_p) 0 + +typedef umem_cache_t kmem_cache_t; + +typedef enum kmem_cbrc { + KMEM_CBRC_YES, + KMEM_CBRC_NO, + KMEM_CBRC_LATER, + KMEM_CBRC_DONT_NEED, + KMEM_CBRC_DONT_KNOW +} kmem_cbrc_t; + +/* + * Task queues + */ +typedef struct taskq taskq_t; +typedef uintptr_t taskqid_t; +typedef void (task_func_t)(void *); + +typedef struct taskq_ent { + struct taskq_ent *tqent_next; + struct taskq_ent *tqent_prev; + task_func_t *tqent_func; + void *tqent_arg; + uintptr_t tqent_flags; +} taskq_ent_t; + +#define TQENT_FLAG_PREALLOC 0x1 /* taskq_dispatch_ent used */ + +#define TASKQ_PREPOPULATE 0x0001 +#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */ +#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */ +#define TASKQ_THREADS_CPU_PCT 0x0008 /* Scale # threads by # cpus */ +#define TASKQ_DC_BATCH 0x0010 /* Mark threads as batch */ + +#define TQ_SLEEP KM_SLEEP /* Can block for memory */ +#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */ +#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */ +#define TQ_FRONT 0x08 /* Queue in front */ + + +extern taskq_t *system_taskq; + +extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); +#define taskq_create_proc(a, b, c, d, e, p, f) \ + (taskq_create(a, b, c, d, e, f)) +#define taskq_create_sysdc(a, b, d, e, p, dc, f) \ + (taskq_create(a, b, maxclsyspri, d, e, f)) +extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); +extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t, + taskq_ent_t *); +extern void taskq_destroy(taskq_t *); +extern void taskq_wait(taskq_t *); +extern int taskq_member(taskq_t *, void *); +extern void system_taskq_init(void); +extern void system_taskq_fini(void); + +#define taskq_dispatch_safe(tq, func, arg, flags, task) \ + taskq_dispatch((tq), (func), (arg), (flags)) + +#define XVA_MAPSIZE 3 +#define XVA_MAGIC 0x78766174 + +/* + * vnodes + */ +typedef struct vnode { + uint64_t v_size; + int v_fd; + char *v_path; + int v_dump_fd; +} vnode_t; + +extern char *vn_dumpdir; +#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ + +typedef struct xoptattr { + timestruc_t xoa_createtime; /* Create time of file */ + uint8_t xoa_archive; + uint8_t xoa_system; + uint8_t xoa_readonly; + uint8_t xoa_hidden; + uint8_t xoa_nounlink; + uint8_t xoa_immutable; + uint8_t xoa_appendonly; + uint8_t xoa_nodump; + uint8_t xoa_settable; + uint8_t xoa_opaque; + uint8_t xoa_av_quarantined; + uint8_t xoa_av_modified; + uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ]; + uint8_t xoa_reparse; + uint8_t xoa_offline; + uint8_t xoa_sparse; +} xoptattr_t; + +typedef struct vattr { + uint_t va_mask; /* bit-mask of attributes */ + u_offset_t va_size; /* file size in bytes */ +} vattr_t; + + +typedef struct xvattr { + vattr_t xva_vattr; /* Embedded vattr structure */ + uint32_t xva_magic; /* Magic Number */ + uint32_t xva_mapsize; /* Size of attr bitmap (32-bit words) */ + uint32_t *xva_rtnattrmapp; /* Ptr to xva_rtnattrmap[] */ + uint32_t xva_reqattrmap[XVA_MAPSIZE]; /* Requested attrs */ + uint32_t xva_rtnattrmap[XVA_MAPSIZE]; /* Returned attrs */ + xoptattr_t xva_xoptattrs; /* Optional attributes */ +} xvattr_t; + +typedef struct vsecattr { + uint_t vsa_mask; /* See below */ + int vsa_aclcnt; /* ACL entry count */ + void *vsa_aclentp; /* pointer to ACL entries */ + int vsa_dfaclcnt; /* default ACL entry count */ + void *vsa_dfaclentp; /* pointer to default ACL entries */ + size_t vsa_aclentsz; /* ACE size in bytes of vsa_aclentp */ +} vsecattr_t; + +#define AT_TYPE 0x00001 +#define AT_MODE 0x00002 +#define AT_UID 0x00004 +#define AT_GID 0x00008 +#define AT_FSID 0x00010 +#define AT_NODEID 0x00020 +#define AT_NLINK 0x00040 +#define AT_SIZE 0x00080 +#define AT_ATIME 0x00100 +#define AT_MTIME 0x00200 +#define AT_CTIME 0x00400 +#define AT_RDEV 0x00800 +#define AT_BLKSIZE 0x01000 +#define AT_NBLOCKS 0x02000 +#define AT_SEQ 0x08000 +#define AT_XVATTR 0x10000 + +#define CRCREAT 0 + +extern int fop_getattr(vnode_t *vp, vattr_t *vap); + +#define VOP_CLOSE(vp, f, c, o, cr, ct) 0 +#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0 +#define VOP_GETATTR(vp, vap, cr) fop_getattr((vp), (vap)); + +#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd) + +#define VN_RELE(vp) vn_close(vp, 0, NULL, NULL) +#define VN_RELE_ASYNC(vp, taskq) vn_close(vp, 0, NULL, NULL) + +#define vn_lock(vp, type) +#define VOP_UNLOCK(vp, type) + +extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp, + int x2, int x3); +extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp, + int x2, int x3, vnode_t *vp, int fd); +extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, + offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp); +extern void vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td); + +#define vn_remove(path, x1, x2) remove(path) +#define vn_rename(from, to, seg) rename((from), (to)) +#define vn_is_readonly(vp) B_FALSE + +extern vnode_t *rootdir; + +#include <sys/file.h> /* for FREAD, FWRITE, etc */ +#define FTRUNC O_TRUNC + +/* + * Random stuff + */ +#define ddi_get_lbolt() (gethrtime() >> 23) +#define ddi_get_lbolt64() (gethrtime() >> 23) +#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */ + +extern void delay(clock_t ticks); + +#define SEC_TO_TICK(sec) ((sec) * hz) +#define NSEC_TO_TICK(usec) ((usec) / (NANOSEC / hz)) + +#define gethrestime_sec() time(NULL) +#define gethrestime(t) \ + do {\ + (t)->tv_sec = gethrestime_sec();\ + (t)->tv_nsec = 0;\ + } while (0); + +#define max_ncpus 64 + +#define minclsyspri 60 +#define maxclsyspri 99 + +#define CPU_SEQID (thr_self() & (max_ncpus - 1)) + +#define kcred NULL +#define CRED() NULL + +#ifndef ptob +#define ptob(x) ((x) * PAGESIZE) +#endif + +extern uint64_t physmem; + +extern int highbit64(uint64_t i); +extern int random_get_bytes(uint8_t *ptr, size_t len); +extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len); + +extern void kernel_init(int); +extern void kernel_fini(void); + +struct spa; +extern void nicenum(uint64_t num, char *buf); +extern void show_pool_stats(struct spa *); + +typedef struct callb_cpr { + kmutex_t *cc_lockp; +} callb_cpr_t; + +#define CALLB_CPR_INIT(cp, lockp, func, name) { \ + (cp)->cc_lockp = lockp; \ +} + +#define CALLB_CPR_SAFE_BEGIN(cp) { \ + ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ +} + +#define CALLB_CPR_SAFE_END(cp, lockp) { \ + ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ +} + +#define CALLB_CPR_EXIT(cp) { \ + ASSERT(MUTEX_HELD((cp)->cc_lockp)); \ + mutex_exit((cp)->cc_lockp); \ +} + +#define zone_dataset_visible(x, y) (1) +#define INGLOBALZONE(z) (1) + +extern char *kmem_asprintf(const char *fmt, ...); +#define strfree(str) kmem_free((str), strlen(str) + 1) + +/* + * Hostname information + */ +extern struct utsname utsname; +extern char hw_serial[]; /* for userland-emulated hostid access */ +extern int ddi_strtoul(const char *str, char **nptr, int base, + unsigned long *result); + +extern int ddi_strtoull(const char *str, char **nptr, int base, + u_longlong_t *result); + +/* ZFS Boot Related stuff. */ + +struct _buf { + intptr_t _fd; +}; + +struct bootstat { + uint64_t st_size; +}; + +typedef struct ace_object { + uid_t a_who; + uint32_t a_access_mask; + uint16_t a_flags; + uint16_t a_type; + uint8_t a_obj_type[16]; + uint8_t a_inherit_obj_type[16]; +} ace_object_t; + + +#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05 +#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06 +#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07 +#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08 + +extern struct _buf *kobj_open_file(char *name); +extern int kobj_read_file(struct _buf *file, char *buf, unsigned size, + unsigned off); +extern void kobj_close_file(struct _buf *file); +extern int kobj_get_filesize(struct _buf *file, uint64_t *size); +extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); +extern int zfs_secpolicy_rename_perms(const char *from, const char *to, + cred_t *cr); +extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr); +extern zoneid_t getzoneid(void); +/* Random compatibility stuff. */ +#define lbolt (gethrtime() >> 23) +#define lbolt64 (gethrtime() >> 23) + +extern uint64_t physmem; + +#define gethrestime_sec() time(NULL) + +#define pwrite64(d, p, n, o) pwrite(d, p, n, o) +#define readdir64(d) readdir(d) +#define SIGPENDING(td) (0) +#define root_mount_wait() do { } while (0) +#define root_mounted() (1) + +struct file { + void *dummy; +}; + +#define FCREAT O_CREAT +#define FOFFMAX 0x0 + +/* SID stuff */ +typedef struct ksiddomain { + uint_t kd_ref; + uint_t kd_len; + char *kd_name; +} ksiddomain_t; + +ksiddomain_t *ksid_lookupdomain(const char *); +void ksiddomain_rele(ksiddomain_t *); + +typedef uint32_t idmap_rid_t; + +#define DDI_SLEEP KM_SLEEP +#define ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) (0) + +#define SX_SYSINIT(name, lock, desc) + +#define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, \ + intptr_t arg2, struct sysctl_req *req + +/* + * This describes the access space for a sysctl request. This is needed + * so that we can use the interface from the kernel or from user-space. + */ +struct sysctl_req { + struct thread *td; /* used for access checking */ + int lock; /* wiring state */ + void *oldptr; + size_t oldlen; + size_t oldidx; + int (*oldfunc)(struct sysctl_req *, const void *, size_t); + void *newptr; + size_t newlen; + size_t newidx; + int (*newfunc)(struct sysctl_req *, void *, size_t); + size_t validlen; + int flags; +}; + +SLIST_HEAD(sysctl_oid_list, sysctl_oid); + +/* + * This describes one "oid" in the MIB tree. Potentially more nodes can + * be hidden behind it, expanded by the handler. + */ +struct sysctl_oid { + struct sysctl_oid_list *oid_parent; + SLIST_ENTRY(sysctl_oid) oid_link; + int oid_number; + u_int oid_kind; + void *oid_arg1; + intptr_t oid_arg2; + const char *oid_name; + int (*oid_handler)(SYSCTL_HANDLER_ARGS); + const char *oid_fmt; + int oid_refcnt; + u_int oid_running; + const char *oid_descr; +}; + +#define SYSCTL_DECL(...) +#define SYSCTL_NODE(...) +#define SYSCTL_INT(...) +#define SYSCTL_UINT(...) +#define SYSCTL_ULONG(...) +#define SYSCTL_PROC(...) +#define SYSCTL_QUAD(...) +#define SYSCTL_UQUAD(...) +#ifdef TUNABLE_INT +#undef TUNABLE_INT +#undef TUNABLE_ULONG +#undef TUNABLE_QUAD +#endif +#define TUNABLE_INT(...) +#define TUNABLE_ULONG(...) +#define TUNABLE_QUAD(...) + +int sysctl_handle_64(SYSCTL_HANDLER_ARGS); + +/* Errors */ + +#ifndef ERESTART +#define ERESTART (-1) +#endif + +#ifdef illumos +/* + * Cyclic information + */ +extern kmutex_t cpu_lock; + +typedef uintptr_t cyclic_id_t; +typedef uint16_t cyc_level_t; +typedef void (*cyc_func_t)(void *); + +#define CY_LOW_LEVEL 0 +#define CY_INFINITY INT64_MAX +#define CYCLIC_NONE ((cyclic_id_t)0) + +typedef struct cyc_time { + hrtime_t cyt_when; + hrtime_t cyt_interval; +} cyc_time_t; + +typedef struct cyc_handler { + cyc_func_t cyh_func; + void *cyh_arg; + cyc_level_t cyh_level; +} cyc_handler_t; + +extern cyclic_id_t cyclic_add(cyc_handler_t *, cyc_time_t *); +extern void cyclic_remove(cyclic_id_t); +extern int cyclic_reprogram(cyclic_id_t, hrtime_t); +#endif /* illumos */ + +#ifdef illumos +/* + * Buf structure + */ +#define B_BUSY 0x0001 +#define B_DONE 0x0002 +#define B_ERROR 0x0004 +#define B_READ 0x0040 /* read when I/O occurs */ +#define B_WRITE 0x0100 /* non-read pseudo-flag */ + +typedef struct buf { + int b_flags; + size_t b_bcount; + union { + caddr_t b_addr; + } b_un; + + lldaddr_t _b_blkno; +#define b_lblkno _b_blkno._f + size_t b_resid; + size_t b_bufsize; + int (*b_iodone)(struct buf *); + int b_error; + void *b_private; +} buf_t; + +extern void bioinit(buf_t *); +extern void biodone(buf_t *); +extern void bioerror(buf_t *, int); +extern int geterror(buf_t *); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFS_CONTEXT_H */ diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c new file mode 100644 index 0000000..26d9f36 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c @@ -0,0 +1,342 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. + * Copyright (c) 2014 by Delphix. All rights reserved. + */ + +#include <sys/zfs_context.h> + +int taskq_now; +taskq_t *system_taskq; + +#define TASKQ_ACTIVE 0x00010000 +#define TASKQ_NAMELEN 31 + +struct taskq { + char tq_name[TASKQ_NAMELEN + 1]; + kmutex_t tq_lock; + krwlock_t tq_threadlock; + kcondvar_t tq_dispatch_cv; + kcondvar_t tq_wait_cv; + thread_t *tq_threadlist; + int tq_flags; + int tq_active; + int tq_nthreads; + int tq_nalloc; + int tq_minalloc; + int tq_maxalloc; + kcondvar_t tq_maxalloc_cv; + int tq_maxalloc_wait; + taskq_ent_t *tq_freelist; + taskq_ent_t tq_task; +}; + +static taskq_ent_t * +task_alloc(taskq_t *tq, int tqflags) +{ + taskq_ent_t *t; + int rv; + +again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) { + tq->tq_freelist = t->tqent_next; + } else { + if (tq->tq_nalloc >= tq->tq_maxalloc) { + if (!(tqflags & KM_SLEEP)) + return (NULL); + + /* + * We don't want to exceed tq_maxalloc, but we can't + * wait for other tasks to complete (and thus free up + * task structures) without risking deadlock with + * the caller. So, we just delay for one second + * to throttle the allocation rate. If we have tasks + * complete before one second timeout expires then + * taskq_ent_free will signal us and we will + * immediately retry the allocation. + */ + tq->tq_maxalloc_wait++; + rv = cv_timedwait(&tq->tq_maxalloc_cv, + &tq->tq_lock, ddi_get_lbolt() + hz); + tq->tq_maxalloc_wait--; + if (rv > 0) + goto again; /* signaled */ + } + mutex_exit(&tq->tq_lock); + + t = kmem_alloc(sizeof (taskq_ent_t), tqflags & KM_SLEEP); + + mutex_enter(&tq->tq_lock); + if (t != NULL) + tq->tq_nalloc++; + } + return (t); +} + +static void +task_free(taskq_t *tq, taskq_ent_t *t) +{ + if (tq->tq_nalloc <= tq->tq_minalloc) { + t->tqent_next = tq->tq_freelist; + tq->tq_freelist = t; + } else { + tq->tq_nalloc--; + mutex_exit(&tq->tq_lock); + kmem_free(t, sizeof (taskq_ent_t)); + mutex_enter(&tq->tq_lock); + } + + if (tq->tq_maxalloc_wait) + cv_signal(&tq->tq_maxalloc_cv); +} + +taskqid_t +taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags) +{ + taskq_ent_t *t; + + if (taskq_now) { + func(arg); + return (1); + } + + mutex_enter(&tq->tq_lock); + ASSERT(tq->tq_flags & TASKQ_ACTIVE); + if ((t = task_alloc(tq, tqflags)) == NULL) { + mutex_exit(&tq->tq_lock); + return (0); + } + if (tqflags & TQ_FRONT) { + t->tqent_next = tq->tq_task.tqent_next; + t->tqent_prev = &tq->tq_task; + } else { + t->tqent_next = &tq->tq_task; + t->tqent_prev = tq->tq_task.tqent_prev; + } + t->tqent_next->tqent_prev = t; + t->tqent_prev->tqent_next = t; + t->tqent_func = func; + t->tqent_arg = arg; + t->tqent_flags = 0; + cv_signal(&tq->tq_dispatch_cv); + mutex_exit(&tq->tq_lock); + return (1); +} + +void +taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, + taskq_ent_t *t) +{ + ASSERT(func != NULL); + ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC)); + + /* + * Mark it as a prealloc'd task. This is important + * to ensure that we don't free it later. + */ + t->tqent_flags |= TQENT_FLAG_PREALLOC; + /* + * Enqueue the task to the underlying queue. + */ + mutex_enter(&tq->tq_lock); + + if (flags & TQ_FRONT) { + t->tqent_next = tq->tq_task.tqent_next; + t->tqent_prev = &tq->tq_task; + } else { + t->tqent_next = &tq->tq_task; + t->tqent_prev = tq->tq_task.tqent_prev; + } + t->tqent_next->tqent_prev = t; + t->tqent_prev->tqent_next = t; + t->tqent_func = func; + t->tqent_arg = arg; + cv_signal(&tq->tq_dispatch_cv); + mutex_exit(&tq->tq_lock); +} + +void +taskq_wait(taskq_t *tq) +{ + mutex_enter(&tq->tq_lock); + while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0) + cv_wait(&tq->tq_wait_cv, &tq->tq_lock); + mutex_exit(&tq->tq_lock); +} + +static void * +taskq_thread(void *arg) +{ + taskq_t *tq = arg; + taskq_ent_t *t; + boolean_t prealloc; + + mutex_enter(&tq->tq_lock); + while (tq->tq_flags & TASKQ_ACTIVE) { + if ((t = tq->tq_task.tqent_next) == &tq->tq_task) { + if (--tq->tq_active == 0) + cv_broadcast(&tq->tq_wait_cv); + cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock); + tq->tq_active++; + continue; + } + t->tqent_prev->tqent_next = t->tqent_next; + t->tqent_next->tqent_prev = t->tqent_prev; + t->tqent_next = NULL; + t->tqent_prev = NULL; + prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC; + mutex_exit(&tq->tq_lock); + + rw_enter(&tq->tq_threadlock, RW_READER); + t->tqent_func(t->tqent_arg); + rw_exit(&tq->tq_threadlock); + + mutex_enter(&tq->tq_lock); + if (!prealloc) + task_free(tq, t); + } + tq->tq_nthreads--; + cv_broadcast(&tq->tq_wait_cv); + mutex_exit(&tq->tq_lock); + return (NULL); +} + +/*ARGSUSED*/ +taskq_t * +taskq_create(const char *name, int nthreads, pri_t pri, + int minalloc, int maxalloc, uint_t flags) +{ + taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP); + int t; + + if (flags & TASKQ_THREADS_CPU_PCT) { + int pct; + ASSERT3S(nthreads, >=, 0); + ASSERT3S(nthreads, <=, 100); + pct = MIN(nthreads, 100); + pct = MAX(pct, 0); + + nthreads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100; + nthreads = MAX(nthreads, 1); /* need at least 1 thread */ + } else { + ASSERT3S(nthreads, >=, 1); + } + + rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL); + mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL); + (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1); + tq->tq_flags = flags | TASKQ_ACTIVE; + tq->tq_active = nthreads; + tq->tq_nthreads = nthreads; + tq->tq_minalloc = minalloc; + tq->tq_maxalloc = maxalloc; + tq->tq_task.tqent_next = &tq->tq_task; + tq->tq_task.tqent_prev = &tq->tq_task; + tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP); + + if (flags & TASKQ_PREPOPULATE) { + mutex_enter(&tq->tq_lock); + while (minalloc-- > 0) + task_free(tq, task_alloc(tq, KM_SLEEP)); + mutex_exit(&tq->tq_lock); + } + + for (t = 0; t < nthreads; t++) + (void) thr_create(0, 0, taskq_thread, + tq, THR_BOUND, &tq->tq_threadlist[t]); + + return (tq); +} + +void +taskq_destroy(taskq_t *tq) +{ + int t; + int nthreads = tq->tq_nthreads; + + taskq_wait(tq); + + mutex_enter(&tq->tq_lock); + + tq->tq_flags &= ~TASKQ_ACTIVE; + cv_broadcast(&tq->tq_dispatch_cv); + + while (tq->tq_nthreads != 0) + cv_wait(&tq->tq_wait_cv, &tq->tq_lock); + + tq->tq_minalloc = 0; + while (tq->tq_nalloc != 0) { + ASSERT(tq->tq_freelist != NULL); + task_free(tq, task_alloc(tq, KM_SLEEP)); + } + + mutex_exit(&tq->tq_lock); + + for (t = 0; t < nthreads; t++) + (void) thr_join(tq->tq_threadlist[t], NULL, NULL); + + kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t)); + + rw_destroy(&tq->tq_threadlock); + mutex_destroy(&tq->tq_lock); + cv_destroy(&tq->tq_dispatch_cv); + cv_destroy(&tq->tq_wait_cv); + cv_destroy(&tq->tq_maxalloc_cv); + + kmem_free(tq, sizeof (taskq_t)); +} + +int +taskq_member(taskq_t *tq, void *t) +{ + int i; + + if (taskq_now) + return (1); + + for (i = 0; i < tq->tq_nthreads; i++) + if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t) + return (1); + + return (0); +} + +void +system_taskq_init(void) +{ + system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512, + TASKQ_DYNAMIC | TASKQ_PREPOPULATE); +} + +void +system_taskq_fini(void) +{ + taskq_destroy(system_taskq); + system_taskq = NULL; /* defensive */ +} diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/util.c b/cddl/contrib/opensolaris/lib/libzpool/common/util.c new file mode 100644 index 0000000..9b99531 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzpool/common/util.c @@ -0,0 +1,155 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <assert.h> +#include <sys/zfs_context.h> +#include <sys/avl.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/spa.h> +#include <sys/fs/zfs.h> +#include <sys/refcount.h> + +/* + * Routines needed by more than one client of libzpool. + */ + +void +nicenum(uint64_t num, char *buf) +{ + uint64_t n = num; + int index = 0; + char u; + + while (n >= 1024) { + n = (n + (1024 / 2)) / 1024; /* Round up or down */ + index++; + } + + u = " KMGTPE"[index]; + + if (index == 0) { + (void) sprintf(buf, "%llu", (u_longlong_t)n); + } else if (n < 10 && (num & (num - 1)) != 0) { + (void) sprintf(buf, "%.2f%c", + (double)num / (1ULL << 10 * index), u); + } else if (n < 100 && (num & (num - 1)) != 0) { + (void) sprintf(buf, "%.1f%c", + (double)num / (1ULL << 10 * index), u); + } else { + (void) sprintf(buf, "%llu%c", (u_longlong_t)n, u); + } +} + +static void +show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent) +{ + vdev_stat_t *vs; + vdev_stat_t v0 = { 0 }; + uint64_t sec; + uint64_t is_log = 0; + nvlist_t **child; + uint_t c, children; + char used[6], avail[6]; + char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6]; + char *prefix = ""; + + if (indent == 0 && desc != NULL) { + (void) printf(" " + " capacity operations bandwidth ---- errors ----\n"); + (void) printf("description " + "used avail read write read write read write cksum\n"); + } + + if (desc != NULL) { + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log); + + if (is_log) + prefix = "log "; + + if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) != 0) + vs = &v0; + + sec = MAX(1, vs->vs_timestamp / NANOSEC); + + nicenum(vs->vs_alloc, used); + nicenum(vs->vs_space - vs->vs_alloc, avail); + nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops); + nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops); + nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes); + nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes); + nicenum(vs->vs_read_errors, rerr); + nicenum(vs->vs_write_errors, werr); + nicenum(vs->vs_checksum_errors, cerr); + + (void) printf("%*s%s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n", + indent, "", + prefix, + indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12), + desc, + vs->vs_space ? 6 : 0, vs->vs_space ? used : "", + vs->vs_space ? 6 : 0, vs->vs_space ? avail : "", + rops, wops, rbytes, wbytes, rerr, werr, cerr); + } + + if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + nvlist_t *cnv = child[c]; + char *cname, *tname; + uint64_t np; + if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) && + nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname)) + cname = "<unknown>"; + tname = calloc(1, strlen(cname) + 2); + (void) strcpy(tname, cname); + if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0) + tname[strlen(tname)] = '0' + np; + show_vdev_stats(tname, ctype, cnv, indent + 2); + free(tname); + } +} + +void +show_pool_stats(spa_t *spa) +{ + nvlist_t *config, *nvroot; + char *name; + + VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0); + + VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, + &name) == 0); + + show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0); + show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0); + show_vdev_stats(NULL, ZPOOL_CONFIG_SPARES, nvroot, 0); + + nvlist_free(config); +} diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d b/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d new file mode 100644 index 0000000..1351733 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +provider zfs { + probe probe0(char *probename); + probe probe1(char *probename, unsigned long arg1); + probe probe2(char *probename, unsigned long arg1, unsigned long arg2); + probe probe3(char *probename, unsigned long arg1, unsigned long arg2, + unsigned long arg3); + probe probe4(char *probename, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4); + + probe set__error(int err); +}; + +#pragma D attributes Evolving/Evolving/ISA provider zfs provider +#pragma D attributes Private/Private/Unknown provider zfs module +#pragma D attributes Private/Private/Unknown provider zfs function +#pragma D attributes Evolving/Evolving/ISA provider zfs name +#pragma D attributes Evolving/Evolving/ISA provider zfs args diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py b/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py new file mode 100644 index 0000000..76b0998 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py @@ -0,0 +1,27 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# + +""" +package which provides an administrative interface to ZFS +""" diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py b/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py new file mode 100644 index 0000000..7ad4b49 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py @@ -0,0 +1,398 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +"""This module implements the "zfs allow" and "zfs unallow" subcommands. +The only public interface is the zfs.allow.do_allow() function.""" + +import zfs.util +import zfs.dataset +import optparse +import sys +import pwd +import grp +import errno + +_ = zfs.util._ + +class FSPerms(object): + """This class represents all the permissions that are set on a + particular filesystem (not including those inherited).""" + + __slots__ = "create", "sets", "local", "descend", "ld" + __repr__ = zfs.util.default_repr + + def __init__(self, raw): + """Create a FSPerms based on the dict of raw permissions + from zfs.ioctl.get_fsacl().""" + # set of perms + self.create = set() + + # below are { "Ntype name": set(perms) } + # where N is a number that we just use for sorting, + # type is "user", "group", "everyone", or "" (for sets) + # name is a user, group, or set name, or "" (for everyone) + self.sets = dict() + self.local = dict() + self.descend = dict() + self.ld = dict() + + # see the comment in dsl_deleg.c for the definition of whokey + for whokey in raw.keys(): + perms = raw[whokey].keys() + whotypechr = whokey[0].lower() + ws = whokey[3:] + if whotypechr == "c": + self.create.update(perms) + elif whotypechr == "s": + nwho = "1" + ws + self.sets.setdefault(nwho, set()).update(perms) + else: + if whotypechr == "u": + try: + name = pwd.getpwuid(int(ws)).pw_name + except KeyError: + name = ws + nwho = "1user " + name + elif whotypechr == "g": + try: + name = grp.getgrgid(int(ws)).gr_name + except KeyError: + name = ws + nwho = "2group " + name + elif whotypechr == "e": + nwho = "3everyone" + else: + raise ValueError(whotypechr) + + if whokey[1] == "l": + d = self.local + elif whokey[1] == "d": + d = self.descend + else: + raise ValueError(whokey[1]) + + d.setdefault(nwho, set()).update(perms) + + # Find perms that are in both local and descend, and + # move them to ld. + for nwho in self.local: + if nwho not in self.descend: + continue + # note: these are set operations + self.ld[nwho] = self.local[nwho] & self.descend[nwho] + self.local[nwho] -= self.ld[nwho] + self.descend[nwho] -= self.ld[nwho] + + @staticmethod + def __ldstr(d, header): + s = "" + for (nwho, perms) in sorted(d.items()): + # local and descend may have entries where perms + # is an empty set, due to consolidating all + # permissions into ld + if perms: + s += "\t%s %s\n" % \ + (nwho[1:], ",".join(sorted(perms))) + if s: + s = header + s + return s + + def __str__(self): + s = self.__ldstr(self.sets, _("Permission sets:\n")) + + if self.create: + s += _("Create time permissions:\n") + s += "\t%s\n" % ",".join(sorted(self.create)) + + s += self.__ldstr(self.local, _("Local permissions:\n")) + s += self.__ldstr(self.descend, _("Descendent permissions:\n")) + s += self.__ldstr(self.ld, _("Local+Descendent permissions:\n")) + return s.rstrip() + +def args_to_perms(parser, options, who, perms): + """Return a dict of raw perms {"whostr" -> {"perm" -> None}} + based on the command-line input.""" + + # perms is not set if we are doing a "zfs unallow <who> <fs>" to + # remove all of someone's permissions + if perms: + setperms = dict(((p, None) for p in perms if p[0] == "@")) + baseperms = dict(((canonicalized_perm(p), None) + for p in perms if p[0] != "@")) + else: + setperms = None + baseperms = None + + d = dict() + + def storeperm(typechr, inheritchr, arg): + assert typechr in "ugecs" + assert inheritchr in "ld-" + + def mkwhokey(t): + return "%c%c$%s" % (t, inheritchr, arg) + + if baseperms or not perms: + d[mkwhokey(typechr)] = baseperms + if setperms or not perms: + d[mkwhokey(typechr.upper())] = setperms + + def decodeid(w, toidfunc, fmt): + try: + return int(w) + except ValueError: + try: + return toidfunc(w)[2] + except KeyError: + parser.error(fmt % w) + + if options.set: + storeperm("s", "-", who) + elif options.create: + storeperm("c", "-", "") + else: + for w in who: + if options.user: + id = decodeid(w, pwd.getpwnam, + _("invalid user %s")) + typechr = "u" + elif options.group: + id = decodeid(w, grp.getgrnam, + _("invalid group %s")) + typechr = "g" + elif w == "everyone": + id = "" + typechr = "e" + else: + try: + id = pwd.getpwnam(w)[2] + typechr = "u" + except KeyError: + try: + id = grp.getgrnam(w)[2] + typechr = "g" + except KeyError: + parser.error(_("invalid user/group %s") % w) + if options.local: + storeperm(typechr, "l", id) + if options.descend: + storeperm(typechr, "d", id) + return d + +perms_subcmd = dict( + create=_("Must also have the 'mount' ability"), + destroy=_("Must also have the 'mount' ability"), + snapshot="", + rollback="", + clone=_("""Must also have the 'create' ability and 'mount' +\t\t\t\tability in the origin file system"""), + promote=_("""Must also have the 'mount' +\t\t\t\tand 'promote' ability in the origin file system"""), + rename=_("""Must also have the 'mount' and 'create' +\t\t\t\tability in the new parent"""), + receive=_("Must also have the 'mount' and 'create' ability"), + allow=_("Must also have the permission that is being\n\t\t\t\tallowed"), + mount=_("Allows mount/umount of ZFS datasets"), + share=_("Allows sharing file systems over NFS or SMB\n\t\t\t\tprotocols"), + send="", + hold=_("Allows adding a user hold to a snapshot"), + release=_("Allows releasing a user hold which\n\t\t\t\tmight destroy the snapshot"), + diff=_("Allows lookup of paths within a dataset,\n\t\t\t\tgiven an object number. Ordinary users need this\n\t\t\t\tin order to use zfs diff"), + bookmark="", +) + +perms_other = dict( + userprop=_("Allows changing any user property"), + userquota=_("Allows accessing any userquota@... property"), + groupquota=_("Allows accessing any groupquota@... property"), + userused=_("Allows reading any userused@... property"), + groupused=_("Allows reading any groupused@... property"), +) + +def hasset(ds, setname): + """Return True if the given setname (string) is defined for this + ds (Dataset).""" + # It would be nice to cache the result of get_fsacl(). + for raw in ds.get_fsacl().values(): + for whokey in raw.keys(): + if whokey[0].lower() == "s" and whokey[3:] == setname: + return True + return False + +def canonicalized_perm(permname): + """Return the canonical name (string) for this permission (string). + Raises ZFSError if it is not a valid permission.""" + if permname in perms_subcmd.keys() or permname in perms_other.keys(): + return permname + try: + return zfs.dataset.getpropobj(permname).name + except KeyError: + raise zfs.util.ZFSError(errno.EINVAL, permname, + _("invalid permission")) + +def print_perms(): + """Print the set of supported permissions.""" + print(_("\nThe following permissions are supported:\n")) + fmt = "%-16s %-14s\t%s" + print(fmt % (_("NAME"), _("TYPE"), _("NOTES"))) + + for (name, note) in sorted(perms_subcmd.iteritems()): + print(fmt % (name, _("subcommand"), note)) + + for (name, note) in sorted(perms_other.iteritems()): + print(fmt % (name, _("other"), note)) + + for (name, prop) in sorted(zfs.dataset.proptable.iteritems()): + if prop.visible and prop.delegatable(): + print(fmt % (name, _("property"), "")) + +def do_allow(): + """Implements the "zfs allow" and "zfs unallow" subcommands.""" + un = (sys.argv[1] == "unallow") + + def usage(msg=None): + parser.print_help() + print_perms() + if msg: + print + parser.exit("zfs: error: " + msg) + else: + parser.exit() + + if un: + u = _("""unallow [-rldug] <"everyone"|user|group>[,...] + [<perm|@setname>[,...]] <filesystem|volume> + unallow [-rld] -e [<perm|@setname>[,...]] <filesystem|volume> + unallow [-r] -c [<perm|@setname>[,...]] <filesystem|volume> + unallow [-r] -s @setname [<perm|@setname>[,...]] <filesystem|volume>""") + verb = _("remove") + sstr = _("undefine permission set") + else: + u = _("""allow <filesystem|volume> + allow [-ldug] <"everyone"|user|group>[,...] <perm|@setname>[,...] + <filesystem|volume> + allow [-ld] -e <perm|@setname>[,...] <filesystem|volume> + allow -c <perm|@setname>[,...] <filesystem|volume> + allow -s @setname <perm|@setname>[,...] <filesystem|volume>""") + verb = _("set") + sstr = _("define permission set") + + parser = optparse.OptionParser(usage=u, prog="zfs") + + parser.add_option("-l", action="store_true", dest="local", + help=_("%s permission locally") % verb) + parser.add_option("-d", action="store_true", dest="descend", + help=_("%s permission for descendents") % verb) + parser.add_option("-u", action="store_true", dest="user", + help=_("%s permission for user") % verb) + parser.add_option("-g", action="store_true", dest="group", + help=_("%s permission for group") % verb) + parser.add_option("-e", action="store_true", dest="everyone", + help=_("%s permission for everyone") % verb) + parser.add_option("-c", action="store_true", dest="create", + help=_("%s create time permissions") % verb) + parser.add_option("-s", action="store_true", dest="set", help=sstr) + if un: + parser.add_option("-r", action="store_true", dest="recursive", + help=_("remove permissions recursively")) + + if len(sys.argv) == 3 and not un: + # just print the permissions on this fs + + if sys.argv[2] == "-h": + # hack to make "zfs allow -h" work + usage() + ds = zfs.dataset.Dataset(sys.argv[2], snaps=False) + + p = dict() + for (fs, raw) in ds.get_fsacl().items(): + p[fs] = FSPerms(raw) + + for fs in sorted(p.keys(), reverse=True): + s = _("---- Permissions on %s ") % fs + print(s + "-" * (70-len(s))) + print(p[fs]) + return + + + (options, args) = parser.parse_args(sys.argv[2:]) + + if sum((bool(options.everyone), bool(options.user), + bool(options.group))) > 1: + parser.error(_("-u, -g, and -e are mutually exclusive")) + + def mungeargs(expected_len): + if un and len(args) == expected_len-1: + return (None, args[expected_len-2]) + elif len(args) == expected_len: + return (args[expected_len-2].split(","), + args[expected_len-1]) + else: + usage(_("wrong number of parameters")) + + if options.set: + if options.local or options.descend or options.user or \ + options.group or options.everyone or options.create: + parser.error(_("invalid option combined with -s")) + if args[0][0] != "@": + parser.error(_("invalid set name: missing '@' prefix")) + + (perms, fsname) = mungeargs(3) + who = args[0] + elif options.create: + if options.local or options.descend or options.user or \ + options.group or options.everyone or options.set: + parser.error(_("invalid option combined with -c")) + + (perms, fsname) = mungeargs(2) + who = None + elif options.everyone: + if options.user or options.group or \ + options.create or options.set: + parser.error(_("invalid option combined with -e")) + + (perms, fsname) = mungeargs(2) + who = ["everyone"] + else: + (perms, fsname) = mungeargs(3) + who = args[0].split(",") + + if not options.local and not options.descend: + options.local = True + options.descend = True + + d = args_to_perms(parser, options, who, perms) + + ds = zfs.dataset.Dataset(fsname, snaps=False) + + if not un and perms: + for p in perms: + if p[0] == "@" and not hasset(ds, p): + parser.error(_("set %s is not defined") % p) + + ds.set_fsacl(un, d) + if un and options.recursive: + for child in ds.descendents(): + child.set_fsacl(un, d) diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py b/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py new file mode 100644 index 0000000..26192e4 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py @@ -0,0 +1,234 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# + +"""Implements the Dataset class, providing methods for manipulating ZFS +datasets. Also implements the Property class, which describes ZFS +properties.""" + +import zfs.ioctl +import zfs.util +import errno + +_ = zfs.util._ + +class Property(object): + """This class represents a ZFS property. It contains + information about the property -- if it's readonly, a number vs + string vs index, etc. Only native properties are represented by + this class -- not user properties (eg "user:prop") or userspace + properties (eg "userquota@joe").""" + + __slots__ = "name", "number", "type", "default", "attr", "validtypes", \ + "values", "colname", "rightalign", "visible", "indextable" + __repr__ = zfs.util.default_repr + + def __init__(self, t): + """t is the tuple of information about this property + from zfs.ioctl.get_proptable, which should match the + members of zprop_desc_t (see zfs_prop.h).""" + + self.name = t[0] + self.number = t[1] + self.type = t[2] + if self.type == "string": + self.default = t[3] + else: + self.default = t[4] + self.attr = t[5] + self.validtypes = t[6] + self.values = t[7] + self.colname = t[8] + self.rightalign = t[9] + self.visible = t[10] + self.indextable = t[11] + + def delegatable(self): + """Return True if this property can be delegated with + "zfs allow".""" + return self.attr != "readonly" + +proptable = dict() +for name, t in zfs.ioctl.get_proptable().iteritems(): + proptable[name] = Property(t) +del name, t + +def getpropobj(name): + """Return the Property object that is identified by the given + name string. It can be the full name, or the column name.""" + try: + return proptable[name] + except KeyError: + for p in proptable.itervalues(): + if p.colname and p.colname.lower() == name: + return p + raise + +class Dataset(object): + """Represents a ZFS dataset (filesystem, snapshot, zvol, clone, etc). + + Generally, this class provides interfaces to the C functions in + zfs.ioctl which actually interface with the kernel to manipulate + datasets. + + Unless otherwise noted, any method can raise a ZFSError to + indicate failure.""" + + __slots__ = "name", "__props" + __repr__ = zfs.util.default_repr + + def __init__(self, name, props=None, + types=("filesystem", "volume"), snaps=True): + """Open the named dataset, checking that it exists and + is of the specified type. + + name is the string name of this dataset. + + props is the property settings dict from zfs.ioctl.next_dataset. + + types is an iterable of strings specifying which types + of datasets are permitted. Accepted strings are + "filesystem" and "volume". Defaults to accepting all + types. + + snaps is a boolean specifying if snapshots are acceptable. + + Raises a ZFSError if the dataset can't be accessed (eg + doesn't exist) or is not of the specified type. + """ + + self.name = name + + e = zfs.util.ZFSError(errno.EINVAL, + _("cannot open %s") % name, + _("operation not applicable to datasets of this type")) + if "@" in name and not snaps: + raise e + if not props: + props = zfs.ioctl.dataset_props(name) + self.__props = props + if "volume" not in types and self.getprop("type") == 3: + raise e + if "filesystem" not in types and self.getprop("type") == 2: + raise e + + def getprop(self, propname): + """Return the value of the given property for this dataset. + + Currently only works for native properties (those with a + Property object.) + + Raises KeyError if propname does not specify a native property. + Does not raise ZFSError. + """ + + p = getpropobj(propname) + try: + return self.__props[p.name]["value"] + except KeyError: + return p.default + + def parent(self): + """Return a Dataset representing the parent of this one.""" + return Dataset(self.name[:self.name.rindex("/")]) + + def descendents(self): + """A generator function which iterates over all + descendent Datasets (not including snapshots.""" + + cookie = 0 + while True: + # next_dataset raises StopIteration when done + (name, cookie, props) = \ + zfs.ioctl.next_dataset(self.name, False, cookie) + ds = Dataset(name, props) + yield ds + for child in ds.descendents(): + yield child + + def userspace(self, prop): + """A generator function which iterates over a + userspace-type property. + + prop specifies which property ("userused@", + "userquota@", "groupused@", or "groupquota@"). + + returns 3-tuple of domain (string), rid (int), and space (int). + """ + + d = zfs.ioctl.userspace_many(self.name, prop) + for ((domain, rid), space) in d.iteritems(): + yield (domain, rid, space) + + def userspace_upgrade(self): + """Initialize the accounting information for + userused@... and groupused@... properties.""" + return zfs.ioctl.userspace_upgrade(self.name) + + def set_fsacl(self, un, d): + """Add to the "zfs allow"-ed permissions on this Dataset. + + un is True if the specified permissions should be removed. + + d is a dict specifying which permissions to add/remove: + { "whostr" -> None # remove all perms for this entity + "whostr" -> { "perm" -> None} # add/remove these perms + } """ + return zfs.ioctl.set_fsacl(self.name, un, d) + + def get_fsacl(self): + """Get the "zfs allow"-ed permissions on the Dataset. + + Return a dict("whostr": { "perm" -> None }).""" + + return zfs.ioctl.get_fsacl(self.name) + + def get_holds(self): + """Get the user holds on this Dataset. + + Return a dict("tag": timestamp).""" + + return zfs.ioctl.get_holds(self.name) + +def snapshots_fromcmdline(dsnames, recursive): + for dsname in dsnames: + if not "@" in dsname: + raise zfs.util.ZFSError(errno.EINVAL, + _("cannot open %s") % dsname, + _("operation only applies to snapshots")) + try: + ds = Dataset(dsname) + yield ds + except zfs.util.ZFSError, e: + if not recursive or e.errno != errno.ENOENT: + raise + if recursive: + (base, snapname) = dsname.split('@') + parent = Dataset(base) + for child in parent.descendents(): + try: + yield Dataset(child.name + "@" + + snapname) + except zfs.util.ZFSError, e: + if e.errno != errno.ENOENT: + raise diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py b/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py new file mode 100644 index 0000000..9f380fd --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py @@ -0,0 +1,28 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# + +import zfs.userspace + +do_groupspace = zfs.userspace.do_userspace + diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/holds.py b/cddl/contrib/opensolaris/lib/pyzfs/common/holds.py new file mode 100644 index 0000000..800e28f --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/holds.py @@ -0,0 +1,75 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# + +"""This module implements the "zfs holds" subcommand. +The only public interface is the zfs.holds.do_holds() function.""" + +import optparse +import sys +import errno +import time +import zfs.util +import zfs.dataset +import zfs.table + +_ = zfs.util._ + +def do_holds(): + """Implements the "zfs holds" subcommand.""" + def usage(msg=None): + parser.print_help() + if msg: + print + parser.exit("zfs: error: " + msg) + else: + parser.exit() + + u = _("""holds [-r] <snapshot> ...""") + + parser = optparse.OptionParser(usage=u, prog="zfs") + + parser.add_option("-r", action="store_true", dest="recursive", + help=_("list holds recursively")) + + (options, args) = parser.parse_args(sys.argv[2:]) + + if len(args) < 1: + usage(_("missing snapshot argument")) + + fields = ("name", "tag", "timestamp") + rjustfields = () + printing = False + gotone = False + t = zfs.table.Table(fields, rjustfields) + for ds in zfs.dataset.snapshots_fromcmdline(args, options.recursive): + gotone = True + for tag, tm in ds.get_holds().iteritems(): + val = {"name": ds.name, "tag": tag, + "timestamp": time.ctime(tm)} + t.addline(ds.name, val) + printing = True + if printing: + t.printme() + elif not gotone: + raise zfs.util.ZFSError(errno.ENOENT, _("no matching datasets")) diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c b/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c new file mode 100644 index 0000000..d1f82a7 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c @@ -0,0 +1,544 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <Python.h> +#include <sys/zfs_ioctl.h> +#include <sys/fs/zfs.h> +#include <strings.h> +#include <unistd.h> +#include <libnvpair.h> +#include <libintl.h> +#include <libzfs.h> +#include <libzfs_impl.h> +#include "zfs_prop.h" + +static PyObject *ZFSError; +static int zfsdevfd; + +#ifdef __lint +#define dgettext(x, y) y +#endif + +#define _(s) dgettext(TEXT_DOMAIN, s) + +/*PRINTFLIKE1*/ +static void +seterr(char *fmt, ...) +{ + char errstr[1024]; + va_list v; + + va_start(v, fmt); + (void) vsnprintf(errstr, sizeof (errstr), fmt, v); + va_end(v); + + PyErr_SetObject(ZFSError, Py_BuildValue("is", errno, errstr)); +} + +static char cmdstr[HIS_MAX_RECORD_LEN]; + +static int +ioctl_with_cmdstr(int ioc, zfs_cmd_t *zc) +{ + int err; + + if (cmdstr[0]) + zc->zc_history = (uint64_t)(uintptr_t)cmdstr; + err = ioctl(zfsdevfd, ioc, zc); + cmdstr[0] = '\0'; + return (err); +} + +static PyObject * +nvl2py(nvlist_t *nvl) +{ + PyObject *pyo; + nvpair_t *nvp; + + pyo = PyDict_New(); + + for (nvp = nvlist_next_nvpair(nvl, NULL); nvp; + nvp = nvlist_next_nvpair(nvl, nvp)) { + PyObject *pyval; + char *sval; + uint64_t ival; + boolean_t bval; + nvlist_t *nval; + + switch (nvpair_type(nvp)) { + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &sval); + pyval = Py_BuildValue("s", sval); + break; + + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &ival); + pyval = Py_BuildValue("K", ival); + break; + + case DATA_TYPE_NVLIST: + (void) nvpair_value_nvlist(nvp, &nval); + pyval = nvl2py(nval); + break; + + case DATA_TYPE_BOOLEAN: + Py_INCREF(Py_None); + pyval = Py_None; + break; + + case DATA_TYPE_BOOLEAN_VALUE: + (void) nvpair_value_boolean_value(nvp, &bval); + pyval = Py_BuildValue("i", bval); + break; + + default: + PyErr_SetNone(PyExc_ValueError); + Py_DECREF(pyo); + return (NULL); + } + + PyDict_SetItemString(pyo, nvpair_name(nvp), pyval); + Py_DECREF(pyval); + } + + return (pyo); +} + +static nvlist_t * +dict2nvl(PyObject *d) +{ + nvlist_t *nvl; + int err; + PyObject *key, *value; + int pos = 0; + + if (!PyDict_Check(d)) { + PyErr_SetObject(PyExc_ValueError, d); + return (NULL); + } + + err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0); + assert(err == 0); + + while (PyDict_Next(d, &pos, &key, &value)) { + char *keystr = PyString_AsString(key); + if (keystr == NULL) { + PyErr_SetObject(PyExc_KeyError, key); + nvlist_free(nvl); + return (NULL); + } + + if (PyDict_Check(value)) { + nvlist_t *valnvl = dict2nvl(value); + err = nvlist_add_nvlist(nvl, keystr, valnvl); + nvlist_free(valnvl); + } else if (value == Py_None) { + err = nvlist_add_boolean(nvl, keystr); + } else if (PyString_Check(value)) { + char *valstr = PyString_AsString(value); + err = nvlist_add_string(nvl, keystr, valstr); + } else if (PyInt_Check(value)) { + uint64_t valint = PyInt_AsUnsignedLongLongMask(value); + err = nvlist_add_uint64(nvl, keystr, valint); + } else if (PyBool_Check(value)) { + boolean_t valbool = value == Py_True ? B_TRUE : B_FALSE; + err = nvlist_add_boolean_value(nvl, keystr, valbool); + } else { + PyErr_SetObject(PyExc_ValueError, value); + nvlist_free(nvl); + return (NULL); + } + assert(err == 0); + } + + return (nvl); +} + +static PyObject * +fakepropval(uint64_t value) +{ + PyObject *d = PyDict_New(); + PyDict_SetItemString(d, "value", Py_BuildValue("K", value)); + return (d); +} + +static void +add_ds_props(zfs_cmd_t *zc, PyObject *nvl) +{ + dmu_objset_stats_t *s = &zc->zc_objset_stats; + PyDict_SetItemString(nvl, "numclones", + fakepropval(s->dds_num_clones)); + PyDict_SetItemString(nvl, "issnap", + fakepropval(s->dds_is_snapshot)); + PyDict_SetItemString(nvl, "inconsistent", + fakepropval(s->dds_inconsistent)); +} + +/* On error, returns NULL but does not set python exception. */ +static PyObject * +ioctl_with_dstnv(int ioc, zfs_cmd_t *zc) +{ + int nvsz = 2048; + void *nvbuf; + PyObject *pynv = NULL; + +again: + nvbuf = malloc(nvsz); + zc->zc_nvlist_dst_size = nvsz; + zc->zc_nvlist_dst = (uintptr_t)nvbuf; + + if (ioctl(zfsdevfd, ioc, zc) == 0) { + nvlist_t *nvl; + + errno = nvlist_unpack(nvbuf, zc->zc_nvlist_dst_size, &nvl, 0); + if (errno == 0) { + pynv = nvl2py(nvl); + nvlist_free(nvl); + } + } else if (errno == ENOMEM) { + free(nvbuf); + nvsz = zc->zc_nvlist_dst_size; + goto again; + } + free(nvbuf); + return (pynv); +} + +static PyObject * +py_next_dataset(PyObject *self, PyObject *args) +{ + int ioc; + uint64_t cookie; + zfs_cmd_t zc = { 0 }; + int snaps; + char *name; + PyObject *nvl; + PyObject *ret = NULL; + + if (!PyArg_ParseTuple(args, "siK", &name, &snaps, &cookie)) + return (NULL); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + zc.zc_cookie = cookie; + + if (snaps) + ioc = ZFS_IOC_SNAPSHOT_LIST_NEXT; + else + ioc = ZFS_IOC_DATASET_LIST_NEXT; + + nvl = ioctl_with_dstnv(ioc, &zc); + if (nvl) { + add_ds_props(&zc, nvl); + ret = Py_BuildValue("sKO", zc.zc_name, zc.zc_cookie, nvl); + Py_DECREF(nvl); + } else if (errno == ESRCH) { + PyErr_SetNone(PyExc_StopIteration); + } else { + if (snaps) + seterr(_("cannot get snapshots of %s"), name); + else + seterr(_("cannot get child datasets of %s"), name); + } + return (ret); +} + +static PyObject * +py_dataset_props(PyObject *self, PyObject *args) +{ + zfs_cmd_t zc = { 0 }; + int snaps; + char *name; + PyObject *nvl; + + if (!PyArg_ParseTuple(args, "s", &name)) + return (NULL); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + + nvl = ioctl_with_dstnv(ZFS_IOC_OBJSET_STATS, &zc); + if (nvl) { + add_ds_props(&zc, nvl); + } else { + seterr(_("cannot access dataset %s"), name); + } + return (nvl); +} + +static PyObject * +py_get_fsacl(PyObject *self, PyObject *args) +{ + zfs_cmd_t zc = { 0 }; + char *name; + PyObject *nvl; + + if (!PyArg_ParseTuple(args, "s", &name)) + return (NULL); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + + nvl = ioctl_with_dstnv(ZFS_IOC_GET_FSACL, &zc); + if (nvl == NULL) + seterr(_("cannot get permissions on %s"), name); + + return (nvl); +} + +static PyObject * +py_set_fsacl(PyObject *self, PyObject *args) +{ + int un; + size_t nvsz; + zfs_cmd_t zc = { 0 }; + char *name, *nvbuf; + PyObject *dict, *file; + nvlist_t *nvl; + int err; + + if (!PyArg_ParseTuple(args, "siO!", &name, &un, + &PyDict_Type, &dict)) + return (NULL); + + nvl = dict2nvl(dict); + if (nvl == NULL) + return (NULL); + + err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE); + assert(err == 0); + nvbuf = malloc(nvsz); + err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0); + assert(err == 0); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + zc.zc_nvlist_src_size = nvsz; + zc.zc_nvlist_src = (uintptr_t)nvbuf; + zc.zc_perm_action = un; + + err = ioctl_with_cmdstr(ZFS_IOC_SET_FSACL, &zc); + free(nvbuf); + if (err) { + seterr(_("cannot set permissions on %s"), name); + return (NULL); + } + + Py_RETURN_NONE; +} + +static PyObject * +py_get_holds(PyObject *self, PyObject *args) +{ + zfs_cmd_t zc = { 0 }; + char *name; + PyObject *nvl; + + if (!PyArg_ParseTuple(args, "s", &name)) + return (NULL); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + + nvl = ioctl_with_dstnv(ZFS_IOC_GET_HOLDS, &zc); + if (nvl == NULL) + seterr(_("cannot get holds for %s"), name); + + return (nvl); +} + +static PyObject * +py_userspace_many(PyObject *self, PyObject *args) +{ + zfs_cmd_t zc = { 0 }; + zfs_userquota_prop_t type; + char *name, *propname; + int bufsz = 1<<20; + void *buf; + PyObject *dict, *file; + int error; + + if (!PyArg_ParseTuple(args, "ss", &name, &propname)) + return (NULL); + + for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) + if (strcmp(propname, zfs_userquota_prop_prefixes[type]) == 0) + break; + if (type == ZFS_NUM_USERQUOTA_PROPS) { + PyErr_SetString(PyExc_KeyError, propname); + return (NULL); + } + + dict = PyDict_New(); + buf = malloc(bufsz); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + zc.zc_objset_type = type; + zc.zc_cookie = 0; + + while (1) { + zfs_useracct_t *zua = buf; + + zc.zc_nvlist_dst = (uintptr_t)buf; + zc.zc_nvlist_dst_size = bufsz; + + error = ioctl(zfsdevfd, ZFS_IOC_USERSPACE_MANY, &zc); + if (error || zc.zc_nvlist_dst_size == 0) + break; + + while (zc.zc_nvlist_dst_size > 0) { + PyObject *pykey, *pyval; + + pykey = Py_BuildValue("sI", + zua->zu_domain, zua->zu_rid); + pyval = Py_BuildValue("K", zua->zu_space); + PyDict_SetItem(dict, pykey, pyval); + Py_DECREF(pykey); + Py_DECREF(pyval); + + zua++; + zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t); + } + } + + free(buf); + + if (error != 0) { + Py_DECREF(dict); + seterr(_("cannot get %s property on %s"), propname, name); + return (NULL); + } + + return (dict); +} + +static PyObject * +py_userspace_upgrade(PyObject *self, PyObject *args) +{ + zfs_cmd_t zc = { 0 }; + char *name; + int error; + + if (!PyArg_ParseTuple(args, "s", &name)) + return (NULL); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + error = ioctl(zfsdevfd, ZFS_IOC_USERSPACE_UPGRADE, &zc); + + if (error != 0) { + seterr(_("cannot initialize user accounting information on %s"), + name); + return (NULL); + } + + Py_RETURN_NONE; +} + +static PyObject * +py_set_cmdstr(PyObject *self, PyObject *args) +{ + char *str; + + if (!PyArg_ParseTuple(args, "s", &str)) + return (NULL); + + (void) strlcpy(cmdstr, str, sizeof (cmdstr)); + + Py_RETURN_NONE; +} + +static PyObject * +py_get_proptable(PyObject *self, PyObject *args) +{ + zprop_desc_t *t = zfs_prop_get_table(); + PyObject *d = PyDict_New(); + zfs_prop_t i; + + for (i = 0; i < ZFS_NUM_PROPS; i++) { + zprop_desc_t *p = &t[i]; + PyObject *tuple; + static const char *typetable[] = + {"number", "string", "index"}; + static const char *attrtable[] = + {"default", "readonly", "inherit", "onetime"}; + PyObject *indextable; + + if (p->pd_proptype == PROP_TYPE_INDEX) { + const zprop_index_t *it = p->pd_table; + indextable = PyDict_New(); + int j; + for (j = 0; it[j].pi_name; j++) { + PyDict_SetItemString(indextable, + it[j].pi_name, + Py_BuildValue("K", it[j].pi_value)); + } + } else { + Py_INCREF(Py_None); + indextable = Py_None; + } + + tuple = Py_BuildValue("sissKsissiiO", + p->pd_name, p->pd_propnum, typetable[p->pd_proptype], + p->pd_strdefault, p->pd_numdefault, + attrtable[p->pd_attr], p->pd_types, + p->pd_values, p->pd_colname, + p->pd_rightalign, p->pd_visible, indextable); + PyDict_SetItemString(d, p->pd_name, tuple); + Py_DECREF(tuple); + } + + return (d); +} + +static PyMethodDef zfsmethods[] = { + {"next_dataset", py_next_dataset, METH_VARARGS, + "Get next child dataset or snapshot."}, + {"get_fsacl", py_get_fsacl, METH_VARARGS, "Get allowed permissions."}, + {"set_fsacl", py_set_fsacl, METH_VARARGS, "Set allowed permissions."}, + {"userspace_many", py_userspace_many, METH_VARARGS, + "Get user space accounting."}, + {"userspace_upgrade", py_userspace_upgrade, METH_VARARGS, + "Upgrade fs to enable user space accounting."}, + {"set_cmdstr", py_set_cmdstr, METH_VARARGS, + "Set command string for history logging."}, + {"dataset_props", py_dataset_props, METH_VARARGS, + "Get dataset properties."}, + {"get_proptable", py_get_proptable, METH_NOARGS, + "Get property table."}, + {"get_holds", py_get_holds, METH_VARARGS, "Get user holds."}, + {NULL, NULL, 0, NULL} +}; + +void +initioctl(void) +{ + PyObject *zfs_ioctl = Py_InitModule("zfs.ioctl", zfsmethods); + PyObject *zfs_util = PyImport_ImportModule("zfs.util"); + PyObject *devfile; + + if (zfs_util == NULL) + return; + + ZFSError = PyObject_GetAttrString(zfs_util, "ZFSError"); + devfile = PyObject_GetAttrString(zfs_util, "dev"); + zfsdevfd = PyObject_AsFileDescriptor(devfile); + + zfs_prop_init(); +} diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/table.py b/cddl/contrib/opensolaris/lib/pyzfs/common/table.py new file mode 100644 index 0000000..d2a45a1 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/table.py @@ -0,0 +1,70 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# + +import zfs.util + +class Table: + __slots__ = "fields", "rjustfields", "maxfieldlen", "lines" + __repr__ = zfs.util.default_repr + + def __init__(self, fields, rjustfields=()): + # XXX maybe have a defaults, too? + self.fields = fields + self.rjustfields = rjustfields + self.maxfieldlen = dict.fromkeys(fields, 0) + self.lines = list() + + def __updatemax(self, k, v): + self.maxfieldlen[k] = max(self.maxfieldlen.get(k, None), v) + + def addline(self, sortkey, values): + """values is a dict from field name to value""" + + va = list() + for f in self.fields: + v = str(values[f]) + va.append(v) + self.__updatemax(f, len(v)) + self.lines.append((sortkey, va)) + + def printme(self, headers=True): + if headers: + d = dict([(f, f.upper()) for f in self.fields]) + self.addline(None, d) + + self.lines.sort() + for (k, va) in self.lines: + line = str() + for i in range(len(self.fields)): + if not headers: + line += va[i] + line += "\t" + else: + if self.fields[i] in self.rjustfields: + fmt = "%*s " + else: + fmt = "%-*s " + mfl = self.maxfieldlen[self.fields[i]] + line += fmt % (mfl, va[i]) + print(line) diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py b/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py new file mode 100644 index 0000000..cbdd4dd --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py @@ -0,0 +1,27 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# + +import zfs.allow + +do_unallow = zfs.allow.do_allow diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py b/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py new file mode 100644 index 0000000..33646bc --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py @@ -0,0 +1,246 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# + +"""This module implements the "zfs userspace" and "zfs groupspace" subcommands. +The only public interface is the zfs.userspace.do_userspace() function.""" + +import optparse +import sys +import pwd +import grp +import errno +import solaris.misc +import zfs.util +import zfs.ioctl +import zfs.dataset +import zfs.table + +_ = zfs.util._ + +# map from property name prefix -> (field name, isgroup) +props = { + "userused@": ("used", False), + "userquota@": ("quota", False), + "groupused@": ("used", True), + "groupquota@": ("quota", True), +} + +def skiptype(options, prop): + """Return True if this property (eg "userquota@") should be skipped.""" + (field, isgroup) = props[prop] + if field not in options.fields: + return True + if isgroup and "posixgroup" not in options.types and \ + "smbgroup" not in options.types: + return True + if not isgroup and "posixuser" not in options.types and \ + "smbuser" not in options.types: + return True + return False + +def new_entry(options, isgroup, domain, rid): + """Return a dict("field": value) for this domain (string) + rid (int)""" + + if domain: + idstr = "%s-%u" % (domain, rid) + else: + idstr = "%u" % rid + + (typename, mapfunc) = { + (1, 1): ("SMB Group", lambda id: solaris.misc.sid_to_name(id, 0)), + (1, 0): ("POSIX Group", lambda id: grp.getgrgid(int(id)).gr_name), + (0, 1): ("SMB User", lambda id: solaris.misc.sid_to_name(id, 1)), + (0, 0): ("POSIX User", lambda id: pwd.getpwuid(int(id)).pw_name) + }[isgroup, bool(domain)] + + if typename.lower().replace(" ", "") not in options.types: + return None + + v = dict() + v["type"] = typename + + # python's getpwuid/getgrgid is confused by ephemeral uids + if not options.noname and rid < 1<<31: + try: + v["name"] = mapfunc(idstr) + except KeyError: + pass + + if "name" not in v: + v["name"] = idstr + if not domain: + # it's just a number, so pad it with spaces so + # that it will sort numerically + v["name.sort"] = "%20d" % rid + # fill in default values + v["used"] = "0" + v["used.sort"] = 0 + v["quota"] = "none" + v["quota.sort"] = 0 + return v + +def process_one_raw(acct, options, prop, elem): + """Update the acct dict to incorporate the + information from this elem from Dataset.userspace(prop).""" + + (domain, rid, value) = elem + (field, isgroup) = props[prop] + + if options.translate and domain: + try: + rid = solaris.misc.sid_to_id("%s-%u" % (domain, rid), + not isgroup) + domain = None + except KeyError: + pass; + key = (isgroup, domain, rid) + + try: + v = acct[key] + except KeyError: + v = new_entry(options, isgroup, domain, rid) + if not v: + return + acct[key] = v + + # Add our value to an existing value, which may be present if + # options.translate is set. + value = v[field + ".sort"] = value + v[field + ".sort"] + + if options.parsable: + v[field] = str(value) + else: + v[field] = zfs.util.nicenum(value) + +def do_userspace(): + """Implements the "zfs userspace" and "zfs groupspace" subcommands.""" + + def usage(msg=None): + parser.print_help() + if msg: + print + parser.exit("zfs: error: " + msg) + else: + parser.exit() + + if sys.argv[1] == "userspace": + defaulttypes = "posixuser,smbuser" + else: + defaulttypes = "posixgroup,smbgroup" + + fields = ("type", "name", "used", "quota") + rjustfields = ("used", "quota") + types = ("all", "posixuser", "smbuser", "posixgroup", "smbgroup") + + u = _("%s [-niHp] [-o field[,...]] [-sS field] ... \n") % sys.argv[1] + u += _(" [-t type[,...]] <filesystem|snapshot>") + parser = optparse.OptionParser(usage=u, prog="zfs") + + parser.add_option("-n", action="store_true", dest="noname", + help=_("Print numeric ID instead of user/group name")) + parser.add_option("-i", action="store_true", dest="translate", + help=_("translate SID to posix (possibly ephemeral) ID")) + parser.add_option("-H", action="store_true", dest="noheaders", + help=_("no headers, tab delimited output")) + parser.add_option("-p", action="store_true", dest="parsable", + help=_("exact (parsable) numeric output")) + parser.add_option("-o", dest="fields", metavar="field[,...]", + default="type,name,used,quota", + help=_("print only these fields (eg type,name,used,quota)")) + parser.add_option("-s", dest="sortfields", metavar="field", + type="choice", choices=fields, default=list(), + action="callback", callback=zfs.util.append_with_opt, + help=_("sort field")) + parser.add_option("-S", dest="sortfields", metavar="field", + type="choice", choices=fields, #-s sets the default + action="callback", callback=zfs.util.append_with_opt, + help=_("reverse sort field")) + parser.add_option("-t", dest="types", metavar="type[,...]", + default=defaulttypes, + help=_("print only these types (eg posixuser,smbuser,posixgroup,smbgroup,all)")) + + (options, args) = parser.parse_args(sys.argv[2:]) + if len(args) != 1: + usage(_("wrong number of arguments")) + dsname = args[0] + + options.fields = options.fields.split(",") + for f in options.fields: + if f not in fields: + usage(_("invalid field %s") % f) + + options.types = options.types.split(",") + for t in options.types: + if t not in types: + usage(_("invalid type %s") % t) + + if not options.sortfields: + options.sortfields = [("-s", "type"), ("-s", "name")] + + if "all" in options.types: + options.types = types[1:] + + ds = zfs.dataset.Dataset(dsname, types=("filesystem")) + + if ds.getprop("jailed") and solaris.misc.isglobalzone(): + options.noname = True + + if not ds.getprop("useraccounting"): + print(_("Initializing accounting information on old filesystem, please wait...")) + ds.userspace_upgrade() + + # gather and process accounting information + # Due to -i, we need to keep a dict, so we can potentially add + # together the posix ID and SID's usage. Grr. + acct = dict() + for prop in props.keys(): + if skiptype(options, prop): + continue; + for elem in ds.userspace(prop): + process_one_raw(acct, options, prop, elem) + + def cmpkey(val): + l = list() + for (opt, field) in options.sortfields: + try: + n = val[field + ".sort"] + except KeyError: + n = val[field] + if opt == "-S": + # reverse sorting + try: + n = -n + except TypeError: + # it's a string; decompose it + # into an array of integers, + # each one the negative of that + # character + n = [-ord(c) for c in n] + l.append(n) + return l + + t = zfs.table.Table(options.fields, rjustfields) + for val in acct.itervalues(): + t.addline(cmpkey(val), val) + t.printme(not options.noheaders) diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/util.py b/cddl/contrib/opensolaris/lib/pyzfs/common/util.py new file mode 100644 index 0000000..a33c669 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/pyzfs/common/util.py @@ -0,0 +1,141 @@ +#! /usr/bin/python2.6 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. +# + +"""This module provides utility functions for ZFS. +zfs.util.dev -- a file object of /dev/zfs """ + +import gettext +import errno +import os +import solaris.misc +# Note: this module (zfs.util) should not import zfs.ioctl, because that +# would introduce a circular dependency + +errno.ECANCELED = 47 +errno.ENOTSUP = 48 + +dev = open("/dev/zfs", "w") + +try: + _ = gettext.translation("SUNW_OST_OSLIB", "/usr/lib/locale", + fallback=True).gettext +except: + _ = solaris.misc.gettext + +def default_repr(self): + """A simple __repr__ function.""" + if self.__slots__: + str = "<" + self.__class__.__name__ + for v in self.__slots__: + str += " %s: %r" % (v, getattr(self, v)) + return str + ">" + else: + return "<%s %s>" % \ + (self.__class__.__name__, repr(self.__dict__)) + +class ZFSError(StandardError): + """This exception class represents a potentially user-visible + ZFS error. If uncaught, it will be printed and the process will + exit with exit code 1. + + errno -- the error number (eg, from ioctl(2)).""" + + __slots__ = "why", "task", "errno" + __repr__ = default_repr + + def __init__(self, eno, task=None, why=None): + """Create a ZFS exception. + eno -- the error number (errno) + task -- a string describing the task that failed + why -- a string describing why it failed (defaults to + strerror(eno))""" + + self.errno = eno + self.task = task + self.why = why + + def __str__(self): + s = "" + if self.task: + s += self.task + ": " + if self.why: + s += self.why + else: + s += self.strerror + return s + + __strs = { + errno.EPERM: _("permission denied"), + errno.ECANCELED: + _("delegated administration is disabled on pool"), + errno.EINTR: _("signal received"), + errno.EIO: _("I/O error"), + errno.ENOENT: _("dataset does not exist"), + errno.ENOSPC: _("out of space"), + errno.EEXIST: _("dataset already exists"), + errno.EBUSY: _("dataset is busy"), + errno.EROFS: + _("snapshot permissions cannot be modified"), + errno.ENAMETOOLONG: _("dataset name is too long"), + errno.ENOTSUP: _("unsupported version"), + errno.EAGAIN: _("pool I/O is currently suspended"), + } + + __strs[errno.EACCES] = __strs[errno.EPERM] + __strs[errno.ENXIO] = __strs[errno.EIO] + __strs[errno.ENODEV] = __strs[errno.EIO] + __strs[errno.EDQUOT] = __strs[errno.ENOSPC] + + @property + def strerror(self): + return ZFSError.__strs.get(self.errno, os.strerror(self.errno)) + +def nicenum(num): + """Return a nice string (eg "1.23M") for this integer.""" + index = 0; + n = num; + + while n >= 1024: + n /= 1024 + index += 1 + + u = " KMGTPE"[index] + if index == 0: + return "%u" % n; + elif n >= 100 or num & ((1024*index)-1) == 0: + # it's an exact multiple of its index, or it wouldn't + # fit as floating point, so print as an integer + return "%u%c" % (n, u) + else: + # due to rounding, it's tricky to tell what precision to + # use; try each precision and see which one fits + for i in (2, 1, 0): + s = "%.*f%c" % (i, float(num) / (1<<(10*index)), u) + if len(s) <= 5: + return s + +def append_with_opt(option, opt, value, parser): + """A function for OptionParser which appends a tuple (opt, value).""" + getattr(parser.values, option.dest).append((opt, value)) + |