From f6f2e89faf1a03356b628cbff7d6fb0a47274b8e Mon Sep 17 00:00:00 2001 From: bz Date: Thu, 13 Aug 2009 15:02:02 +0000 Subject: MFC r196185: Rather than replicating the maths from the kernel, use the value the kernel calculated directly as we already read it with struct vnet. This will make kvm_vnet.c more resilent in case of possible kernel changes. Reviewed by: rwatson Approved by: re (kib) --- lib/libkvm/kvm_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libkvm/kvm_vnet.c b/lib/libkvm/kvm_vnet.c index 1a02cad..d192c67 100644 --- a/lib/libkvm/kvm_vnet.c +++ b/lib/libkvm/kvm_vnet.c @@ -195,7 +195,7 @@ _kvm_vnet_selectpid(kvm_t *kd, pid_t pid) kd->vnet_start = nl[NLIST_START_VNET].n_value; kd->vnet_stop = nl[NLIST_STOP_VNET].n_value; kd->vnet_current = (uintptr_t)prison.pr_vnet; - kd->vnet_base = (uintptr_t)vnet.vnet_data_mem - kd->vnet_start; + kd->vnet_base = vnet.vnet_data_base; return (0); } -- cgit v1.1 From e84e2e363118a59d3501ec21bf59d8e2776cb88a Mon Sep 17 00:00:00 2001 From: rwatson Date: Thu, 13 Aug 2009 15:08:05 +0000 Subject: Merge r196123 from head to stable/8: Update posix1e-related man pages, especially as relates to MAC, to more accurately reflect the last ten years of work. Approved by: re (kib) --- lib/libc/posix1e/Makefile.inc | 4 +- lib/libc/posix1e/mac.3 | 153 ++++++++++++++++++++---------------------- lib/libc/posix1e/mac_free.3 | 9 +-- lib/libc/posix1e/mac_get.3 | 1 + lib/libc/posix1e/mac_set.3 | 1 + lib/libc/posix1e/mac_text.3 | 1 + lib/libc/posix1e/posix1e.3 | 116 +++++++++++++------------------- 7 files changed, 129 insertions(+), 156 deletions(-) (limited to 'lib') diff --git a/lib/libc/posix1e/Makefile.inc b/lib/libc/posix1e/Makefile.inc index de0302b..c9e78bd 100644 --- a/lib/libc/posix1e/Makefile.inc +++ b/lib/libc/posix1e/Makefile.inc @@ -106,14 +106,16 @@ MLINKS+=acl_create_entry.3 acl_create_entry_np.3\ mac_get.3 mac_get_fd.3 \ mac_get.3 mac_get_file.3 \ mac_get.3 mac_get_link.3 \ + mac_get.3 mac_get_peer.3 \ mac_get.3 mac_get_pid.3 \ mac_get.3 mac_get_proc.3 \ mac_prepare.3 mac_prepare_file_label.3 \ mac_prepare.3 mac_prepare_ifnet_label.3 \ mac_prepare.3 mac_prepare_process_label.3 \ - mac_set.3 mac_set_link.3 \ + mac_prepare.3 mac_prepare_type.3 \ mac_set.3 mac_set_fd.3 \ mac_set.3 mac_set_file.3 \ + mac_set.3 mac_set_link.3 \ mac_set.3 mac_set_proc.3 \ mac_text.3 mac_from_text.3 \ mac_text.3 mac_to_text.3 diff --git a/lib/libc/posix1e/mac.3 b/lib/libc/posix1e/mac.3 index ac6affd..c570998 100644 --- a/lib/libc/posix1e/mac.3 +++ b/lib/libc/posix1e/mac.3 @@ -1,4 +1,5 @@ .\" Copyright (c) 2001, 2003 Networks Associates Technology, Inc. +.\" Copyright (c) 2009 Robert N. M. Watson .\" All rights reserved. .\" .\" This software was developed for the FreeBSD Project by Chris @@ -30,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 19, 2003 +.Dd August 7, 2009 .Dt MAC 3 .Os .Sh NAME @@ -44,81 +45,82 @@ In the kernel configuration file: .Cd "options MAC" .Sh DESCRIPTION -.Fx -permits administrators to define Mandatory Access Control labels -defining levels for the privacy and integrity of data, -overriding discretionary policies -for those objects. -Not all objects currently provide support for MAC labels, -and MAC support must be explicitly enabled by the administrator. -The library calls include routines to retrieve, duplicate, -and set MAC labels associated with files and processes. +Mandatory Access Control labels describe confidentiality, integrity, and +other security attributes of operating system objects, overriding +discretionary access control. +Not all system objects support MAC labeling, and MAC policies must be +explicitly enabled by the administrator. +This API, based on POSIX.1e, includes routines to retrieve, manipulate, set, +and convert to and from text the MAC labels on files and processes. .Pp -POSIX.1e describes a set of MAC manipulation routines -to manage the contents of MAC labels, -as well as their relationships with -files and processes; -almost all of these support routines -are implemented in -.Fx . +MAC labels consist of a set of (name, value) tuples, representing security +attributes from MAC policies. +For example, this label contains security labels defined by two policies, +.Xr mac_biba 4 +and +.Xr mac_mls 4 : +.Bd -literal -offset indent +biba/low,mls/low +.Ed +.Pp +Further syntax and semantics of MAC labels may be found in +.Xr maclabel 7 . .Pp -Available functions, sorted by behavior, include: +Applications operate on labels stored in +.Vt mac_t , +but can convert between this internal format and a text format for the +purposes of presentation to uses or external storage. +When querying a label on an object, a +.Vt mac_t +must first be prepared using the interfaces described in +.Xr mac_prepare 3 , +allowing the application to declare which policies it wishes to interogate. +The application writer can also rely on default label names declared in +.Xr mac.conf 5 . +.Pp +When finished with a +.Vt mac_t , +the application must call +.Xr mac_free 3 +to release its storage. +.Pp +The following functions are defined: .Bl -tag -width indent -.It Fn mac_get_fd -This function is described in -.Xr mac_get 3 , -and may be used to retrieve the -MAC label associated with -a specific file descriptor. -.It Fn mac_get_file -This function is described in +.It Fn mac_is_present +This function, described in +.Xr mac_is_present 3 , +allows applications to test whether MAC is configured, as well as whether +specific policies are configured. +.It Fn mac_get_fd , Fn mac_get_file , Fn mac_get_link , Fn mac_get_peer +These functions, described in .Xr mac_get 3 , -and may be used to retrieve the -MAC label associated with -a named file. -.It Fn mac_get_proc -This function is described in +retrieve the MAC labels associated with file descriptors, files, and socket +peers. +.It Fn mac_get_pid , Fn mac_get_proc +These functions, described in .Xr mac_get 3 , -and may be used to retrieve the -MAC label associated with -the calling process. -.It Fn mac_set_fd -This function is described in -.Xr mac_set 3 , -and may be used to set the -MAC label associated with -a specific file descriptor. -.It Fn mac_set_file -This function is described in +retrieve the MAC labels associated with processes. +.It Fn mac_set_fd , Fn mac_set_file , Fn mac_set_link +These functions, described in .Xr mac_set 3 , -and may be used to set the -MAC label associated with -a named file. +set the MAC labels associated with file descriptors and files. .It Fn mac_set_proc -This function is described in +This function, described in .Xr mac_set 3 , -and may be used to set the -MAC label associated with -the calling process. +sets the MAC label associated with the current process. .It Fn mac_free -This function is described in +This function, desribed in .Xr mac_free 3 , -and may be used to free -userland working MAC label storage. +frees working MAC label storage. .It Fn mac_from_text -This function is described in +This function, described in .Xr mac_text 3 , -and may be used to convert -a text-form MAC label -into a working +converts a text-form MAC label into working MAC label storage, .Vt mac_t . -.It Fn mac_prepare -.It Fn mac_prepare_file_label -.It Fn mac_prepare_ifnet_label -.It Fn mac_prepare_process_label -These functions are described in +.It Fn mac_prepare , Fn mac_prepare_file_label , Fn mac_prepare_ifnet_label , Fn mac_prepare_process_label , Fn mac_prepare_type +These functions, described in .Xr mac_prepare 3 , -and may be used to preallocate storage for MAC label retrieval. +allocate working storage for MAC label operations. .Xr mac_prepare 3 prepares a label based on caller-specified label names; the other calls rely on the default configuration specified in @@ -130,15 +132,6 @@ and may be used to convert a .Vt mac_t into a text-form MAC label. .El -The behavior of some of these calls is influenced by the configuration -settings found in -.Xr mac.conf 5 , -the MAC library run-time configuration file. -.Sh IMPLEMENTATION NOTES -.Fx Ns 's -support for POSIX.1e interfaces and features -is -.Ud . .Sh FILES .Bl -tag -width ".Pa /etc/mac.conf" -compact .It Pa /etc/mac.conf @@ -150,24 +143,20 @@ system objects, but without policy-specific knowledge. .Sh SEE ALSO .Xr mac_free 3 , .Xr mac_get 3 , +.Xr mac_is_present 3 , .Xr mac_prepare 3 , .Xr mac_set 3 , .Xr mac_text 3 , +.Xr posix1e 3 , .Xr mac 4 , .Xr mac.conf 5 , .Xr mac 9 .Sh STANDARDS -These APIs are loosely based on the APIs described in POSIX.1e. -POSIX.1e is described in IEEE POSIX.1e draft 17. -Discussion of the draft -continues on the cross-platform POSIX.1e implementation mailing list. -To join this list, see the -.Fx -POSIX.1e implementation page -for more information. -However, the resemblance of these APIs to the POSIX APIs is only loose, -as the POSIX APIs were unable to express many notions required for -flexible and extensible access control. +These APIs are loosely based on the APIs described in POSIX.1e, as described +in IEEE POSIX.1e draft 17. +However, the resemblence of these APIS to the POSIX APIs is loose, as the +PSOXI APIS were unable to express some notinos required for flexible and +extensible access control. .Sh HISTORY Support for Mandatory Access Control was introduced in .Fx 5.0 diff --git a/lib/libc/posix1e/mac_free.3 b/lib/libc/posix1e/mac_free.3 index 33ef9ec..78118a1 100644 --- a/lib/libc/posix1e/mac_free.3 +++ b/lib/libc/posix1e/mac_free.3 @@ -64,6 +64,7 @@ function. .Xr mac_prepare 3 , .Xr mac_set 3 , .Xr mac_text 3 , +.Xr posix1e 3 , .Xr mac 4 , .Xr mac 9 .Sh STANDARDS @@ -91,8 +92,8 @@ is a complex structure in the .Tn TrustedBSD implementation, .Fn mac_free -is specific to that type, and must not be used to free the character -strings returned from +is specific to +.Vt mac_3 , +and must not be used to free the character strings returned from .Fn mac_to_text . -Doing so may result in undefined behavior, -including application failure. +Doing so may result in undefined behavior. diff --git a/lib/libc/posix1e/mac_get.3 b/lib/libc/posix1e/mac_get.3 index cd498d0..35fa72b 100644 --- a/lib/libc/posix1e/mac_get.3 +++ b/lib/libc/posix1e/mac_get.3 @@ -133,6 +133,7 @@ is not a directory. .Xr mac_prepare 3 , .Xr mac_set 3 , .Xr mac_text 3 , +.Xr posix1e 3 , .Xr mac 4 , .Xr mac 9 .Sh STANDARDS diff --git a/lib/libc/posix1e/mac_set.3 b/lib/libc/posix1e/mac_set.3 index 84a1800..0b245a0 100644 --- a/lib/libc/posix1e/mac_set.3 +++ b/lib/libc/posix1e/mac_set.3 @@ -138,6 +138,7 @@ is not a directory. .Xr mac_is_present 3 , .Xr mac_prepare 3 , .Xr mac_text 3 , +.Xr posix1e 3 , .Xr mac 4 , .Xr mac 9 .Sh HISTORY diff --git a/lib/libc/posix1e/mac_text.3 b/lib/libc/posix1e/mac_text.3 index 66c1e21..dde6ccf 100644 --- a/lib/libc/posix1e/mac_text.3 +++ b/lib/libc/posix1e/mac_text.3 @@ -98,6 +98,7 @@ to allocate internal storage. .Xr mac_is_present 3 , .Xr mac_prepare 3 , .Xr mac_set 3 , +.Xr posix1e 3 , .Xr mac 4 , .Xr maclabel 7 .Sh STANDARDS diff --git a/lib/libc/posix1e/posix1e.3 b/lib/libc/posix1e/posix1e.3 index 2065523..84ce2ec 100644 --- a/lib/libc/posix1e/posix1e.3 +++ b/lib/libc/posix1e/posix1e.3 @@ -1,5 +1,5 @@ .\"- -.\" Copyright (c) 2000 Robert N. M. Watson +.\" Copyright (c) 2000, 2009 Robert N. M. Watson .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 20, 2009 +.Dd August 7, 2009 .Dt POSIX1E 3 .Os .Sh NAME @@ -36,99 +36,77 @@ .Sh SYNOPSIS .In sys/types.h .In sys/acl.h -.\" .In sys/capability.h .In sys/mac.h .Sh DESCRIPTION -The IEEE POSIX.1e specification never left draft form, but the interfaces -it describes are now widely used despite inherent limitations. -Currently, only a few of the interfaces and features are implemented in -.Fx , -although efforts are underway to complete the integration at this time. +POSIX.1e describes five security extensions to the POSIX.1 API: Access +Control Lists (ACLs), Auditing, Capabilities, Mandatory Access Control, and +Information Flow Labels. +While IEEE POSIX.1e D17 specification has not been standardized, several of +its interfaces are widely used. .Pp -POSIX.1e describes five security extensions to the base POSIX.1 API: -Access Control Lists (ACLs), Auditing, Capabilities, Mandatory Access -Control, and Information Flow Labels. .Fx -supports POSIX.1e ACL interfaces, as well as POSIX.1e-like MAC -interfaces. -The TrustedBSD Project has produced but not integrated an implementation -of POSIX.1e Capabilities. -.Pp -POSIX.1e defines both syntax and semantics for these features, but fairly -substantial changes are required to implement these features in the -operating system. -.Pp -As shipped, -.Fx 4.0 -provides API and VFS support for ACLs, but not an implementation on any -native file system. -.Fx 5.0 -includes support for ACLs as part of UFS1 and UFS2, as well as necessary -VFS support for additional file systems to export ACLs as appropriate. -Available API calls relating to ACLs are described in detail in -.Xr acl 3 . -.Pp -As shipped, -.Fx 5.0 -includes support for Mandatory Access Control as well as POSIX.1e-like -APIs for label management. -More information on API calls relating to MAC is available in -.Xr mac 3 . +implements POSIX.1e interface for access control lists, described in +.Xr acl 3 , +and supports ACLs on the +.Xr ffs 7 +file system; ACLs must be administratively enabled using +.Xr tunefs 8 . .Pp -Additional patches supporting POSIX.1e features are provided by the -TrustedBSD project: +.Fx +implements a POSIX.1e-like mandatory access control interface, described in +.Xr mac 3 , +although with a number of extensions and important semantic differences. .Pp -http://www.TrustedBSD.org/ -.Sh IMPLEMENTATION NOTES -.Fx Ns 's -support for POSIX.1e interfaces and features is still under -development at this time, and many of these features are considered new -or experimental. +.Fx +does not implement the POSIX.1e audit, privilege (capability), or information +flow label APIs. +However, +.Fx +does implement the +.Xr libbsm +audit API. .Sh ENVIRONMENT -POSIX.1e assigns security labels to all objects, extending the security +POSIX.1e assigns security attributes to all objects, extending the security functionality described in POSIX.1. -These additional labels provide -fine-grained discretionary access control, fine-grained capabilities, -and labels necessary for mandatory access control. -POSIX.2c describes -a set of userland utilities for manipulating these labels. +These additional attributes store fine-grained discretionary access control +information and mandatory access control labels; for files, they are stored +in extended attributes, described in +.Xr extattr 3 . .Pp -Many of these services are supported by extended attributes, documented -in -.Xr extattr 2 +POSIX.2c describes +a set of userland utilities for manipulating these attributes, including +.Xr getfacl 1 +and +.Xr setfacl 1 +for access control lists, and +.Xr getfmac 8 and -.Xr extattr 9 . -While these APIs are not documented in POSIX.1e, they are similar in -structure. +.Xr setfmac 8 +for mandatory access control labels. .Sh SEE ALSO +.Xr getfacl 1 , +.Xr setfacl 1 , .Xr extattr 2 , .Xr acl 3 , +.Xr extattr 3 , .Xr libbsm 3 , .Xr mac 3 , +.Xr ffs 7 , +.Xr getfmac 8 , +.Xr setfmac 8 , +.Xr tunefs 8 , .Xr acl 9 , .Xr extattr 9 , .Xr mac 9 .Sh STANDARDS POSIX.1e is described in IEEE POSIX.1e draft 17. -Discussion of the draft continues -on the cross-platform POSIX.1e implementation -mailing list. -To join this list, see the -.Fx -POSIX.1e implementation -page for more information. .Sh HISTORY POSIX.1e support was introduced in .Fx 4.0 ; -most of the features are available as of +most features were available as of .Fx 5.0 . -Development continues. .Sh AUTHORS .An Robert N M Watson .An Chris D. Faulhaber .An Thomas Moestl .An Ilmar S Habibulin -.Sh BUGS -Many of these features are considered new or experimental in -.Fx 5.0 -and should be deployed with appropriate caution. -- cgit v1.1 From 4bc31c3c24ad7325984412b3e7b0358a5ed595fa Mon Sep 17 00:00:00 2001 From: ed Date: Fri, 28 Aug 2009 10:25:26 +0000 Subject: MFC r196508: Our implementation of granpt(3) could be valid in the future. When I wrote the pseudo-terminal driver for the MPSAFE TTY code, Robert Watson and I agreed the best way to implement this, would be to let posix_openpt() create a pseudo-terminal with proper permissions in place and let grantpt() and unlockpt() be no-ops. This isn't valid behaviour when looking at the spec. Because I thought it was an elegant solution, I filed a bug report at the Austin Group about this. In their last teleconference, they agreed on this subject. This means that future revisions of POSIX may allow grantpt() and unlockpt() to be no-ops if an open() on /dev/ptmx (if the implementation has such a device) and posix_openpt() already do the right thing. I'd rather put this in the manpage, because simply mentioning we don't comply to any standard makes it look worse than it is. Right now we don't, but at least we took care of it. Approved by: re (kib) --- lib/libc/stdlib/ptsname.3 | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdlib/ptsname.3 b/lib/libc/stdlib/ptsname.3 index bfa3647..f674a54 100644 --- a/lib/libc/stdlib/ptsname.3 +++ b/lib/libc/stdlib/ptsname.3 @@ -134,14 +134,22 @@ The slave pseudo-terminal device could not be accessed. The .Fn ptsname function conforms to -.St -p1003.1-2001 . +.St -p1003.1-2008 . .Pp This implementation of .Fn grantpt and .Fn unlockpt -does not comply with any standard, because these functions assume the -pseudo-terminal has the correct attributes upon creation. +does not conform to +.St -p1003.1-2008 , +because it depends on +.Xr posix_openpt 2 +to create the pseudo-terminal device with proper permissions in place. +It only validates whether +.Fa fildes +is a valid pseudo-terminal master device. +Future revisions of the specification will likely allow this behaviour, +as stated by the Austin Group. .Sh HISTORY The .Fn grantpt , @@ -150,15 +158,3 @@ and .Fn unlockpt functions appeared in .Fx 5.0 . -.Sh NOTES -The purpose of the -.Fn grantpt -and -.Fn unlockpt -functions has no meaning in -.Fx , -because pseudo-terminals obtained by -.Xr posix_openpt 2 -are created on demand. -Because these devices are created with proper permissions in place, they -are guaranteed to be unused by unprivileged processes. -- cgit v1.1 From bc1de82641d5a9be69feee477bff716e1f245aa8 Mon Sep 17 00:00:00 2001 From: delphij Date: Fri, 28 Aug 2009 19:48:06 +0000 Subject: MFC r196525: Consider flag == 0 as the same of flag == R_NEXT. This change will restore a historical behavior that has been changed by revision 190491, and has seen to break exim. Approved by: re (kib) --- lib/libc/db/hash/hash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/db/hash/hash.c b/lib/libc/db/hash/hash.c index b58b36f..83d2657 100644 --- a/lib/libc/db/hash/hash.c +++ b/lib/libc/db/hash/hash.c @@ -711,7 +711,7 @@ hash_seq(const DB *dbp, DBT *key, DBT *data, u_int32_t flag) hashp->cndx = 1; hashp->cpage = NULL; } - next_bucket: +next_bucket: for (bp = NULL; !bp || !bp[0]; ) { if (!(bufp = hashp->cpage)) { for (bucket = hashp->cbucket; @@ -732,7 +732,7 @@ hash_seq(const DB *dbp, DBT *key, DBT *data, u_int32_t flag) } } else { bp = (u_int16_t *)hashp->cpage->page; - if (flag == R_NEXT) { + if (flag == R_NEXT || flag == 0) { hashp->cndx += 2; if (hashp->cndx > bp[0]) { hashp->cpage = NULL; -- cgit v1.1 From d40d30c7397aca89b9085b67c126fce64ab0c0bf Mon Sep 17 00:00:00 2001 From: trasz Date: Sat, 5 Sep 2009 15:01:56 +0000 Subject: MFC r196740: Fix regression introduced with NFSv4 ACL support - make acl_to_text(3) and acl_calc_mask(3) return error instead of crashing when acl passed to them is NULL. Submitted by: markus Reviewed by: rwatson Approved by: re (kib) --- lib/libc/posix1e/acl_calc_mask.c | 13 +++++++------ lib/libc/posix1e/acl_to_text.c | 10 +++++----- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_calc_mask.c b/lib/libc/posix1e/acl_calc_mask.c index 95fd260..a2d1527 100644 --- a/lib/libc/posix1e/acl_calc_mask.c +++ b/lib/libc/posix1e/acl_calc_mask.c @@ -50,12 +50,6 @@ acl_calc_mask(acl_t *acl_p) acl_t acl_new; int i, mask_mode, mask_num; - if (!_acl_brand_may_be(*acl_p, ACL_BRAND_POSIX)) { - errno = EINVAL; - return (-1); - } - _acl_brand_as(*acl_p, ACL_BRAND_POSIX); - /* * (23.4.2.4) requires acl_p to point to a pointer to a valid ACL. * Since one of the primary reasons to use this function would be @@ -67,6 +61,13 @@ acl_calc_mask(acl_t *acl_p) errno = EINVAL; return (-1); } + + if (!_acl_brand_may_be(*acl_p, ACL_BRAND_POSIX)) { + errno = EINVAL; + return (-1); + } + _acl_brand_as(*acl_p, ACL_BRAND_POSIX); + acl_int = &(*acl_p)->ats_acl; if ((acl_int->acl_cnt < 3) || (acl_int->acl_cnt > ACL_MAX_ENTRIES)) { errno = EINVAL; diff --git a/lib/libc/posix1e/acl_to_text.c b/lib/libc/posix1e/acl_to_text.c index 3485b1e..79a950a 100644 --- a/lib/libc/posix1e/acl_to_text.c +++ b/lib/libc/posix1e/acl_to_text.c @@ -70,11 +70,6 @@ _posix1e_acl_to_text(acl_t acl, ssize_t *len_p, int flags) if (buf == NULL) return(NULL); - if (acl == NULL) { - errno = EINVAL; - return(NULL); - } - acl_int = &acl->ats_acl; mask_perm = ACL_PERM_BITS; /* effective is regular if no mask */ @@ -243,6 +238,11 @@ char * acl_to_text_np(acl_t acl, ssize_t *len_p, int flags) { + if (acl == NULL) { + errno = EINVAL; + return(NULL); + } + switch (_acl_brand(acl)) { case ACL_BRAND_POSIX: return (_posix1e_acl_to_text(acl, len_p, flags)); -- cgit v1.1 From 90a09c13dfdde3ffcf990b535aa7fe8eeb253dbf Mon Sep 17 00:00:00 2001 From: kensmith Date: Thu, 10 Sep 2009 14:04:00 +0000 Subject: Remove extra debugging support that is turned on for head but turned off for stable branches: - shift to MALLOC_PRODUCTION - turn off automatic crash dumps - Remove kernel debuggers, INVARIANTS*[1], WITNESS* from GENERIC kernel config files[2] [1] INVARIANTS* left on for ia64 by request marcel [2] sun4v was left as-is Reviewed by: marcel, kib Approved by: re (implicit) --- lib/libc/stdlib/malloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index 270d641..e60746a 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -114,7 +114,7 @@ * defaults the A and J runtime options to off. These settings are appropriate * for production systems. */ -/* #define MALLOC_PRODUCTION */ +#define MALLOC_PRODUCTION #ifndef MALLOC_PRODUCTION /* -- cgit v1.1 From 59162ba6c318eb5246931eab97b9653eac040b7d Mon Sep 17 00:00:00 2001 From: kib Date: Sat, 12 Sep 2009 18:16:46 +0000 Subject: MFC r196861: Handle zero size for posix_memalign. Return NULL or unique address according to the 'V' option. Approved by: re (kensmith) --- lib/libc/stdlib/malloc.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index e60746a..8b13d12 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -5320,6 +5320,15 @@ posix_memalign(void **memptr, size_t alignment, size_t size) goto RETURN; } + if (size == 0) { + if (opt_sysv == false) + size = 1; + else { + result = NULL; + ret = 0; + goto RETURN; + } + } result = ipalloc(alignment, size); } -- cgit v1.1 From 51bfbbd15e84d8be305cd3cd3f7507c7399a66a7 Mon Sep 17 00:00:00 2001 From: attilio Date: Tue, 15 Sep 2009 19:24:18 +0000 Subject: MFC r197224: Use explicit int values for the device states in order to allow, if necessary, in the future, adds of new states without breaking ABI between revisions. Please note that this is a special condition as we want this fix in before RC1 as we assume it is critical and so it has been handled as an instant-merge. Approved by: re (kib) --- lib/libdevinfo/devinfo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libdevinfo/devinfo.h b/lib/libdevinfo/devinfo.h index 91930a4..3e580f2 100644 --- a/lib/libdevinfo/devinfo.h +++ b/lib/libdevinfo/devinfo.h @@ -41,10 +41,10 @@ typedef __uintptr_t devinfo_handle_t; */ /* XXX not sure if I want a copy here, or expose sys/bus.h */ typedef enum devinfo_state { - DIS_NOTPRESENT, /* not probed or probe failed */ - DIS_ALIVE, /* probe succeeded */ - DIS_ATTACHED, /* attach method called */ - DIS_BUSY /* device is open */ + DIS_NOTPRESENT = 10, /* not probed or probe failed */ + DIS_ALIVE = 20, /* probe succeeded */ + DIS_ATTACHED = 30, /* attach method called */ + DIS_BUSY = 40 /* device is open */ } devinfo_state_t; struct devinfo_dev { -- cgit v1.1 From 3134bec5442476cc5b10fc86356716e5e8d24853 Mon Sep 17 00:00:00 2001 From: kientzle Date: Thu, 17 Sep 2009 06:31:59 +0000 Subject: MFC r196962: Fix /usr/bin/unzip: A bug deep in libarchive's read-ahead logic (incorrect handling of zero-length reads before the copy buffer is allocated) is masked by the iso9660 taster. Tar and cpio both enable that taster so were protected from the bug; unzip is susceptible. This both fixes the bug and updates the test harness to exercise this case. Submitted by: Ed Schouten diagnosed the bug and drafted a patch Approved by: re (kib) --- lib/libarchive/archive_read.c | 7 +++++-- lib/libarchive/test/test_compat_zip.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libarchive/archive_read.c b/lib/libarchive/archive_read.c index 9274d66..fd4f888 100644 --- a/lib/libarchive/archive_read.c +++ b/lib/libarchive/archive_read.c @@ -928,9 +928,12 @@ __archive_read_filter_ahead(struct archive_read_filter *filter, for (;;) { /* - * If we can satisfy from the copy buffer, we're done. + * If we can satisfy from the copy buffer (and the + * copy buffer isn't empty), we're done. In particular, + * note that min == 0 is a perfectly well-defined + * request. */ - if (filter->avail >= min) { + if (filter->avail >= min && filter->avail > 0) { if (avail != NULL) *avail = filter->avail; return (filter->next); diff --git a/lib/libarchive/test/test_compat_zip.c b/lib/libarchive/test/test_compat_zip.c index f60bc4f..fefc6fe 100644 --- a/lib/libarchive/test/test_compat_zip.c +++ b/lib/libarchive/test/test_compat_zip.c @@ -36,7 +36,7 @@ test_compat_zip_1(void) assert((a = archive_read_new()) != NULL); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_compression_all(a)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); extract_reference_file(name); assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 10240)); -- cgit v1.1 From 5333c9baf74c842291a18ce7d0a5216c2353e411 Mon Sep 17 00:00:00 2001 From: attilio Date: Thu, 24 Sep 2009 08:35:17 +0000 Subject: MFC r197445: Let fall down in the hard path (thus handling shared waiters wakeup correctly) for the shared waiters also in the rwlock held in shared mode as well, fixing possible deadlocks. Please note that this is a special condition as we want this fix in before RC2 as we assume it is critical and so it has been handled as an instant-merge. For the STABLE_7 branch, 1 week before the MFC is assumed. Approved by: re (kib) --- lib/libthr/thread/thr_umtx.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_umtx.h b/lib/libthr/thread/thr_umtx.h index 41b5f96..a6e462e 100644 --- a/lib/libthr/thread/thr_umtx.h +++ b/lib/libthr/thread/thr_umtx.h @@ -171,8 +171,11 @@ _thr_rwlock_unlock(struct urwlock *rwlock) for (;;) { if (__predict_false(URWLOCK_READER_COUNT(state) == 0)) return (EPERM); - if (!((state & URWLOCK_WRITE_WAITERS) && URWLOCK_READER_COUNT(state) == 1)) { - if (atomic_cmpset_rel_32(&rwlock->rw_state, state, state-1)) + if (!((state & (URWLOCK_WRITE_WAITERS | + URWLOCK_READ_WAITERS)) && + URWLOCK_READER_COUNT(state) == 1)) { + if (atomic_cmpset_rel_32(&rwlock->rw_state, + state, state-1)) return (0); state = rwlock->rw_state; } else { -- cgit v1.1 From 0ba688a4932f6bcc507979e42c93fb154118c991 Mon Sep 17 00:00:00 2001 From: nyan Date: Thu, 24 Sep 2009 15:34:18 +0000 Subject: MFC: r197322 and r197374 Revision: 197322 Log: Correct BIOS header sanitizing on pc98. Revision: 197374 Log: Disable a check on a disk size because it's too strict. This change is to avoid using incorrect geometry. It seems that this is the same problem in g_part_bsd_read()@g_part_bsd.c. Reviewed by: rink Approved by: re (kib) --- lib/libdisk/change.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libdisk/change.c b/lib/libdisk/change.c index 297ac11..5f0c9c6 100644 --- a/lib/libdisk/change.c +++ b/lib/libdisk/change.c @@ -36,17 +36,22 @@ Sanitize_Bios_Geom(struct disk *disk) if (disk->bios_cyl >= 65536) sane = 0; - if (disk->bios_hd > 256) - sane = 0; #ifdef PC98 + if (disk->bios_hd >= 256) + sane = 0; if (disk->bios_sect >= 256) + sane = 0; #else + if (disk->bios_hd > 256) + sane = 0; if (disk->bios_sect > 63) -#endif sane = 0; +#endif +#if 0 /* Disable a check on a disk size. It's too strict. */ if (disk->bios_cyl * disk->bios_hd * disk->bios_sect != disk->chunks->size) sane = 0; +#endif if (sane) return; -- cgit v1.1 From d40bf286b68e59da94f2deb986fc8ab232f45a20 Mon Sep 17 00:00:00 2001 From: brooks Date: Thu, 24 Sep 2009 21:35:13 +0000 Subject: MFC r196990: cr_groups is no longer embedded in struct ucred and is instead stored in a seperate array. As such we need to use kvm_read rather than bcopy to populate the ki_groups field. This fixes a crash when running ps -ax on a coredump. Reported by: brucec Tested by: brucec MFC after: 3 days Approved by: re@ (kib) --- lib/libkvm/kvm_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libkvm/kvm_proc.c b/lib/libkvm/kvm_proc.c index 7fb85a4..071f859 100644 --- a/lib/libkvm/kvm_proc.c +++ b/lib/libkvm/kvm_proc.c @@ -151,7 +151,7 @@ kvm_proclist(kd, what, arg, p, bp, maxcnt) kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW; } kp->ki_ngroups = ucred.cr_ngroups; - bcopy(ucred.cr_groups, kp->ki_groups, + kvm_read(kd, (u_long)ucred.cr_groups, kp->ki_groups, kp->ki_ngroups * sizeof(gid_t)); kp->ki_uid = ucred.cr_uid; if (ucred.cr_prison != NULL) { -- cgit v1.1 From 56fcdb9e459db70ae77a457a5f5f78776457ca42 Mon Sep 17 00:00:00 2001 From: marcus Date: Sun, 4 Oct 2009 21:46:43 +0000 Subject: MFC: rev. 197681 Correct the pthread stub prototype for pthread_mutexattr_settype to allow for the type argument. This is known to fix some pthread_mutexattr_settype() invocations, especially when it comes to pulseaudio. Approved by: re (kib) --- lib/libc/gen/_pthread_stubs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/gen/_pthread_stubs.c b/lib/libc/gen/_pthread_stubs.c index a3e55b0..147235e 100644 --- a/lib/libc/gen/_pthread_stubs.c +++ b/lib/libc/gen/_pthread_stubs.c @@ -222,7 +222,7 @@ STUB_FUNC1(pthread_mutex_trylock, PJT_MUTEX_TRYLOCK, int, void *) STUB_FUNC1(pthread_mutex_unlock, PJT_MUTEX_UNLOCK, int, void *) STUB_FUNC1(pthread_mutexattr_destroy, PJT_MUTEXATTR_DESTROY, int, void *) STUB_FUNC1(pthread_mutexattr_init, PJT_MUTEXATTR_INIT, int, void *) -STUB_FUNC1(pthread_mutexattr_settype, PJT_MUTEXATTR_SETTYPE, int, void *) +STUB_FUNC2(pthread_mutexattr_settype, PJT_MUTEXATTR_SETTYPE, int, void *, int) STUB_FUNC2(pthread_once, PJT_ONCE, int, void *, void *) STUB_FUNC1(pthread_rwlock_destroy, PJT_RWLOCK_DESTROY, int, void *) STUB_FUNC2(pthread_rwlock_init, PJT_RWLOCK_INIT, int, void *, void *) -- cgit v1.1 From 6a88d4c4d6bd818cbd5a84db8e6a10744d7d8eb2 Mon Sep 17 00:00:00 2001 From: brueffer Date: Fri, 9 Oct 2009 13:52:49 +0000 Subject: MFC: r197312 Fix setfib(1) section number. Approved by: re (kib) --- lib/libc/sys/setfib.2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/setfib.2 b/lib/libc/sys/setfib.2 index ca08202..a65a064 100644 --- a/lib/libc/sys/setfib.2 +++ b/lib/libc/sys/setfib.2 @@ -67,8 +67,8 @@ if the .Fa fib argument is greater than the current system maximum. .Sh SEE ALSO -.Xr setsockopt 2 , -.Xr setfib 8 +.Xr setfib 1 , +.Xr setsockopt 2 .Sh STANDARDS The .Fn setfib -- cgit v1.1 From 77a2c065511fd56f5abcb85492a40a871e89a6a8 Mon Sep 17 00:00:00 2001 From: kib Date: Wed, 14 Oct 2009 14:26:19 +0000 Subject: MFC r197958: In nanosleep(2), note that the calling thread is put to sleep, not the whole process. Also explicitely name the parameter that specifies sleep interval. Approved by: re (kensmith) --- lib/libc/sys/nanosleep.2 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/sys/nanosleep.2 b/lib/libc/sys/nanosleep.2 index 18e4c88..f50544b 100644 --- a/lib/libc/sys/nanosleep.2 +++ b/lib/libc/sys/nanosleep.2 @@ -47,7 +47,9 @@ The .Fn nanosleep system call -causes the process to sleep for the specified time. +causes the calling thread to sleep until the time interval specified by +.Fa rqtp +has elapsed. An unmasked signal will cause it to terminate the sleep early, regardless of the .Dv SA_RESTART -- cgit v1.1 From dae6c247dba58450adb88a4f80252645b1ab6d83 Mon Sep 17 00:00:00 2001 From: kan Date: Tue, 20 Oct 2009 19:05:43 +0000 Subject: MFC Revision 197277: Make libc.a provide __stack_chk_fail_local weak alias. This is needed to satisfy static libraries that are compiled with -fpic and linked into static binary afterwards. Several libraries in gcc are examples of such static libs. Approved by: re (kib) --- lib/libc/sys/stack_protector.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/libc/sys/stack_protector.c b/lib/libc/sys/stack_protector.c index 63beebc..14c20eb 100644 --- a/lib/libc/sys/stack_protector.c +++ b/lib/libc/sys/stack_protector.c @@ -108,4 +108,8 @@ __chk_fail(void) __fail("buffer overflow detected; terminated"); } +#ifdef PIC __sym_compat(__stack_chk_fail_local, __stack_chk_fail, FBSD_1.0); +#else +__weak_reference(__stack_chk_fail, __stack_chk_fail_local); +#endif -- cgit v1.1 From ad58b4ba938908e46296c7a6c7314da0d0a19ca4 Mon Sep 17 00:00:00 2001 From: thompsa Date: Fri, 23 Oct 2009 12:02:01 +0000 Subject: MFC r198376 Prevent wraparound of the timeout variable. Submitted by: HPS Approved by: re (kib) --- lib/libusb/libusb20_ugen20.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libusb/libusb20_ugen20.c b/lib/libusb/libusb20_ugen20.c index 0dee793..f9f3689 100644 --- a/lib/libusb/libusb20_ugen20.c +++ b/lib/libusb/libusb20_ugen20.c @@ -800,7 +800,11 @@ ugen20_tr_submit(struct libusb20_transfer *xfer) if (xfer->flags & LIBUSB20_TRANSFER_DO_CLEAR_STALL) { fsep->flags |= USB_FS_FLAG_CLEAR_STALL; } - fsep->timeout = xfer->timeout; + /* NOTE: The "fsep->timeout" variable is 16-bit. */ + if (xfer->timeout > 65535) + fsep->timeout = 65535; + else + fsep->timeout = xfer->timeout; temp.ep_index = xfer->trIndex; -- cgit v1.1 From 88c99df1f745f2a16b992001673bdc2cb203451e Mon Sep 17 00:00:00 2001 From: mav Date: Fri, 30 Oct 2009 12:57:28 +0000 Subject: MFC rev. 197086: Add simple embedded RADIUS server support to libradius, by extending existing API, keeping backward compatibility. First consumer for this functionality is going to become forthcoming MPD-5.4, supporting CoA and DR of RFC 3576: Dynamic Authorization Extensions to RADIUS. --- lib/libradius/libradius.3 | 51 +++++- lib/libradius/radlib.c | 346 ++++++++++++++++++++++++++++++++--------- lib/libradius/radlib.h | 12 ++ lib/libradius/radlib_private.h | 15 +- 4 files changed, 334 insertions(+), 90 deletions(-) (limited to 'lib') diff --git a/lib/libradius/libradius.3 b/lib/libradius/libradius.3 index 095d6e9..9a71521 100644 --- a/lib/libradius/libradius.3 +++ b/lib/libradius/libradius.3 @@ -1,4 +1,5 @@ .\" Copyright 1998 Juniper Networks, Inc. +.\" Copyright 2009 Alexander Motin . .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -24,12 +25,12 @@ .\" .\" $FreeBSD$ .\" -.Dd April 27, 2004 +.Dd August 5, 2009 .Dt LIBRADIUS 3 .Os .Sh NAME .Nm libradius -.Nd RADIUS client library +.Nd RADIUS client/server library .Sh SYNOPSIS .In radlib.h .Ft "struct rad_handle *" @@ -46,6 +47,8 @@ .Fn rad_continue_send_request "struct rad_handle *h" "int selected" "int *fd" "struct timeval *tv" .Ft int .Fn rad_create_request "struct rad_handle *h" "int code" +.Ft int +.Fn rad_create_response "struct rad_handle *h" "int code" .Ft "struct in_addr" .Fn rad_cvt_addr "const void *data" .Ft u_int32_t @@ -79,7 +82,13 @@ .Ft ssize_t .Fn rad_request_authenticator "struct rad_handle *h" "char *buf" "size_t len" .Ft int +.Fn rad_receive_request "struct rad_handle *h" +.Ft int .Fn rad_send_request "struct rad_handle *h" +.Ft int +.Fn rad_send_response "struct rad_handle *h" +.Ft "struct rad_handle *" +.Fn rad_server_open "int fd" .Ft "const char *" .Fn rad_server_secret "struct rad_handle *h" .Ft u_char * @@ -91,16 +100,17 @@ .Sh DESCRIPTION The .Nm -library implements the client side of the Remote Authentication Dial -In User Service (RADIUS). +library implements the Remote Authentication Dial In User Service (RADIUS). RADIUS, defined in RFCs 2865 and 2866, allows clients to perform authentication and accounting by means of network requests to remote servers. .Ss Initialization To use the library, an application must first call .Fn rad_auth_open -or +, .Fn rad_acct_open +or +.Fn rad_server_open to obtain a .Vt "struct rad_handle *" , which provides the context for subsequent operations. @@ -108,8 +118,10 @@ The former function is used for RADIUS authentication and the latter is used for RADIUS accounting. Calls to .Fn rad_auth_open -and +, .Fn rad_acct_open +and +.Fn rad_server_open always succeed unless insufficient virtual memory is available. If the necessary memory cannot be allocated, the functions return @@ -451,6 +463,25 @@ subsequent library calls using the same handle. .Ss Cleanup To free the resources used by the RADIUS library, call .Fn rad_close . +.Ss Server operation +Server mode operates much alike to client mode, except packet send and receieve +steps are swapped. To operate as server you should obtain server context with +.Fn rad_server_open +function, passing opened and bound UDP socket file descriptor as argument. +You should define allowed clients and their secrets using +.Fn rad_add_server +function. port, timeout and max_tries arguments are ignored in server mode. +You should call +.Fn rad_receive_request +function to receive request from client. If you do not want to block on socket +read, you are free to use any poll(), select() or non-blocking sockets for +the socket. +Received request can be parsed with same parsing functions as for client. +To respond to the request you should call +.Fn rad_create_response +and fill response content with same packet writing functions as for client. +When packet is ready, it should be sent with +.Fn rad_send_response .Sh RETURN VALUES The following functions return a non-negative value on success. If @@ -466,6 +497,8 @@ which can be retrieved using .It .Fn rad_create_request .It +.Fn rad_create_response +.It .Fn rad_get_attr .It .Fn rad_put_addr @@ -483,6 +516,8 @@ which can be retrieved using .Fn rad_continue_send_request .It .Fn rad_send_request +.It +.Fn rad_send_response .El .Pp The following functions return a @@ -499,6 +534,8 @@ without recording an error message. .It .Fn rad_auth_open .It +.Fn rad_server_open +.It .Fn rad_cvt_string .El .Pp @@ -553,3 +590,5 @@ subsequently added the ability to perform RADIUS accounting. Later additions and changes by .An Michael Bretterklieber . +Server mode support was added by +.An Alexander Motin . diff --git a/lib/libradius/radlib.c b/lib/libradius/radlib.c index b21447e..8faaf7e 100644 --- a/lib/libradius/radlib.c +++ b/lib/libradius/radlib.c @@ -103,7 +103,7 @@ insert_scrambled_password(struct rad_handle *h, int srv) srvp = &h->servers[srv]; padded_len = h->pass_len == 0 ? 16 : (h->pass_len+15) & ~0xf; - memcpy(md5, &h->request[POS_AUTH], LEN_AUTH); + memcpy(md5, &h->out[POS_AUTH], LEN_AUTH); for (pos = 0; pos < padded_len; pos += 16) { int i; @@ -120,49 +120,55 @@ insert_scrambled_password(struct rad_handle *h, int srv) * in calculating the scrambler for next time. */ for (i = 0; i < 16; i++) - h->request[h->pass_pos + pos + i] = + h->out[h->pass_pos + pos + i] = md5[i] ^= h->pass[pos + i]; } } static void -insert_request_authenticator(struct rad_handle *h, int srv) +insert_request_authenticator(struct rad_handle *h, int resp) { MD5_CTX ctx; const struct rad_server *srvp; - srvp = &h->servers[srv]; + srvp = &h->servers[h->srv]; /* Create the request authenticator */ MD5Init(&ctx); - MD5Update(&ctx, &h->request[POS_CODE], POS_AUTH - POS_CODE); - MD5Update(&ctx, memset(&h->request[POS_AUTH], 0, LEN_AUTH), LEN_AUTH); - MD5Update(&ctx, &h->request[POS_ATTRS], h->req_len - POS_ATTRS); + MD5Update(&ctx, &h->out[POS_CODE], POS_AUTH - POS_CODE); + if (resp) + MD5Update(&ctx, &h->in[POS_AUTH], LEN_AUTH); + else + MD5Update(&ctx, &h->out[POS_AUTH], LEN_AUTH); + MD5Update(&ctx, &h->out[POS_ATTRS], h->out_len - POS_ATTRS); MD5Update(&ctx, srvp->secret, strlen(srvp->secret)); - MD5Final(&h->request[POS_AUTH], &ctx); + MD5Final(&h->out[POS_AUTH], &ctx); } static void -insert_message_authenticator(struct rad_handle *h, int srv) +insert_message_authenticator(struct rad_handle *h, int resp) { #ifdef WITH_SSL u_char md[EVP_MAX_MD_SIZE]; u_int md_len; const struct rad_server *srvp; HMAC_CTX ctx; - srvp = &h->servers[srv]; + srvp = &h->servers[h->srv]; if (h->authentic_pos != 0) { HMAC_CTX_init(&ctx); HMAC_Init(&ctx, srvp->secret, strlen(srvp->secret), EVP_md5()); - HMAC_Update(&ctx, &h->request[POS_CODE], POS_AUTH - POS_CODE); - HMAC_Update(&ctx, &h->request[POS_AUTH], LEN_AUTH); - HMAC_Update(&ctx, &h->request[POS_ATTRS], - h->req_len - POS_ATTRS); + HMAC_Update(&ctx, &h->out[POS_CODE], POS_AUTH - POS_CODE); + if (resp) + HMAC_Update(&ctx, &h->in[POS_AUTH], LEN_AUTH); + else + HMAC_Update(&ctx, &h->out[POS_AUTH], LEN_AUTH); + HMAC_Update(&ctx, &h->out[POS_ATTRS], + h->out_len - POS_ATTRS); HMAC_Final(&ctx, md, &md_len); HMAC_CTX_cleanup(&ctx); HMAC_cleanup(&ctx); - memcpy(&h->request[h->authentic_pos + 2], md, md_len); + memcpy(&h->out[h->authentic_pos + 2], md, md_len); } #endif } @@ -195,20 +201,20 @@ is_valid_response(struct rad_handle *h, int srv, return 0; /* Check the message length */ - if (h->resp_len < POS_ATTRS) + if (h->in_len < POS_ATTRS) return 0; - len = h->response[POS_LENGTH] << 8 | h->response[POS_LENGTH+1]; - if (len > h->resp_len) + len = h->in[POS_LENGTH] << 8 | h->in[POS_LENGTH+1]; + if (len > h->in_len) return 0; /* Check the response authenticator */ MD5Init(&ctx); - MD5Update(&ctx, &h->response[POS_CODE], POS_AUTH - POS_CODE); - MD5Update(&ctx, &h->request[POS_AUTH], LEN_AUTH); - MD5Update(&ctx, &h->response[POS_ATTRS], len - POS_ATTRS); + MD5Update(&ctx, &h->in[POS_CODE], POS_AUTH - POS_CODE); + MD5Update(&ctx, &h->out[POS_AUTH], LEN_AUTH); + MD5Update(&ctx, &h->in[POS_ATTRS], len - POS_ATTRS); MD5Update(&ctx, srvp->secret, strlen(srvp->secret)); MD5Final(md5, &ctx); - if (memcmp(&h->response[POS_AUTH], md5, sizeof md5) != 0) + if (memcmp(&h->in[POS_AUTH], md5, sizeof md5) != 0) return 0; #ifdef WITH_SSL @@ -216,42 +222,111 @@ is_valid_response(struct rad_handle *h, int srv, * For non accounting responses check the message authenticator, * if any. */ - if (h->response[POS_CODE] != RAD_ACCOUNTING_RESPONSE) { + if (h->in[POS_CODE] != RAD_ACCOUNTING_RESPONSE) { - memcpy(resp, h->response, MSGSIZE); + memcpy(resp, h->in, MSGSIZE); pos = POS_ATTRS; /* Search and verify the Message-Authenticator */ while (pos < len - 2) { - if (h->response[pos] == RAD_MESSAGE_AUTHENTIC) { + if (h->in[pos] == RAD_MESSAGE_AUTHENTIC) { /* zero fill the Message-Authenticator */ memset(&resp[pos + 2], 0, MD5_DIGEST_LENGTH); HMAC_CTX_init(&hctx); HMAC_Init(&hctx, srvp->secret, strlen(srvp->secret), EVP_md5()); - HMAC_Update(&hctx, &h->response[POS_CODE], + HMAC_Update(&hctx, &h->in[POS_CODE], POS_AUTH - POS_CODE); - HMAC_Update(&hctx, &h->request[POS_AUTH], + HMAC_Update(&hctx, &h->out[POS_AUTH], LEN_AUTH); HMAC_Update(&hctx, &resp[POS_ATTRS], - h->resp_len - POS_ATTRS); + h->in_len - POS_ATTRS); HMAC_Final(&hctx, md, &md_len); HMAC_CTX_cleanup(&hctx); HMAC_cleanup(&hctx); - if (memcmp(md, &h->response[pos + 2], + if (memcmp(md, &h->in[pos + 2], MD5_DIGEST_LENGTH) != 0) return 0; break; } - pos += h->response[pos + 1]; + pos += h->in[pos + 1]; } } #endif return 1; } +/* + * Return true if the current request is valid for the specified server. + */ +static int +is_valid_request(struct rad_handle *h) +{ + MD5_CTX ctx; + unsigned char md5[MD5_DIGEST_LENGTH]; + const struct rad_server *srvp; + int len; +#ifdef WITH_SSL + HMAC_CTX hctx; + u_char resp[MSGSIZE], md[EVP_MAX_MD_SIZE]; + u_int md_len; + int pos; +#endif + + srvp = &h->servers[h->srv]; + + /* Check the message length */ + if (h->in_len < POS_ATTRS) + return (0); + len = h->in[POS_LENGTH] << 8 | h->in[POS_LENGTH+1]; + if (len > h->in_len) + return (0); + + if (h->in[POS_CODE] != RAD_ACCESS_REQUEST) { + uint32_t zeroes[4] = { 0, 0, 0, 0 }; + /* Check the request authenticator */ + MD5Init(&ctx); + MD5Update(&ctx, &h->in[POS_CODE], POS_AUTH - POS_CODE); + MD5Update(&ctx, zeroes, LEN_AUTH); + MD5Update(&ctx, &h->in[POS_ATTRS], len - POS_ATTRS); + MD5Update(&ctx, srvp->secret, strlen(srvp->secret)); + MD5Final(md5, &ctx); + if (memcmp(&h->in[POS_AUTH], md5, sizeof md5) != 0) + return (0); + } + +#ifdef WITH_SSL + /* Search and verify the Message-Authenticator */ + pos = POS_ATTRS; + while (pos < len - 2) { + if (h->in[pos] == RAD_MESSAGE_AUTHENTIC) { + memcpy(resp, h->in, MSGSIZE); + /* zero fill the Request-Authenticator */ + if (h->in[POS_CODE] != RAD_ACCESS_REQUEST) + memset(&resp[POS_AUTH], 0, LEN_AUTH); + /* zero fill the Message-Authenticator */ + memset(&resp[pos + 2], 0, MD5_DIGEST_LENGTH); + + HMAC_CTX_init(&hctx); + HMAC_Init(&hctx, srvp->secret, + strlen(srvp->secret), EVP_md5()); + HMAC_Update(&hctx, resp, h->in_len); + HMAC_Final(&hctx, md, &md_len); + HMAC_CTX_cleanup(&hctx); + HMAC_cleanup(&hctx); + if (memcmp(md, &h->in[pos + 2], + MD5_DIGEST_LENGTH) != 0) + return (0); + break; + } + pos += h->in[pos + 1]; + } +#endif + return (1); +} + static int put_password_attr(struct rad_handle *h, int type, const void *value, size_t len) { @@ -273,7 +348,7 @@ put_password_attr(struct rad_handle *h, int type, const void *value, size_t len) */ clear_password(h); put_raw_attr(h, type, h->pass, padded_len); - h->pass_pos = h->req_len - padded_len; + h->pass_pos = h->out_len - padded_len; /* Save the cleartext password, padded as necessary */ memcpy(h->pass, value, len); @@ -289,14 +364,14 @@ put_raw_attr(struct rad_handle *h, int type, const void *value, size_t len) generr(h, "Attribute too long"); return -1; } - if (h->req_len + 2 + len > MSGSIZE) { + if (h->out_len + 2 + len > MSGSIZE) { generr(h, "Maximum message length exceeded"); return -1; } - h->request[h->req_len++] = type; - h->request[h->req_len++] = len + 2; - memcpy(&h->request[h->req_len], value, len); - h->req_len += len; + h->out[h->out_len++] = type; + h->out[h->out_len++] = len + 2; + memcpy(&h->out[h->out_len], value, len); + h->out_len += len; return 0; } @@ -523,22 +598,26 @@ rad_continue_send_request(struct rad_handle *h, int selected, int *fd, { int n; + if (h->type == RADIUS_SERVER) { + generr(h, "denied function call"); + return (-1); + } if (selected) { struct sockaddr_in from; socklen_t fromlen; fromlen = sizeof from; - h->resp_len = recvfrom(h->fd, h->response, + h->in_len = recvfrom(h->fd, h->in, MSGSIZE, MSG_WAITALL, (struct sockaddr *)&from, &fromlen); - if (h->resp_len == -1) { + if (h->in_len == -1) { generr(h, "recvfrom: %s", strerror(errno)); return -1; } if (is_valid_response(h, h->srv, &from)) { - h->resp_len = h->response[POS_LENGTH] << 8 | - h->response[POS_LENGTH+1]; - h->resp_pos = POS_ATTRS; - return h->response[POS_CODE]; + h->in_len = h->in[POS_LENGTH] << 8 | + h->in[POS_LENGTH+1]; + h->in_pos = POS_ATTRS; + return h->in[POS_CODE]; } } @@ -556,21 +635,22 @@ rad_continue_send_request(struct rad_handle *h, int selected, int *fd, if (++h->srv >= h->num_servers) h->srv = 0; - if (h->request[POS_CODE] == RAD_ACCOUNTING_REQUEST) - /* Insert the request authenticator into the request */ - insert_request_authenticator(h, h->srv); - else + if (h->out[POS_CODE] == RAD_ACCESS_REQUEST) { /* Insert the scrambled password into the request */ if (h->pass_pos != 0) insert_scrambled_password(h, h->srv); - - insert_message_authenticator(h, h->srv); + } + insert_message_authenticator(h, 0); + if (h->out[POS_CODE] != RAD_ACCESS_REQUEST) { + /* Insert the request authenticator into the request */ + insert_request_authenticator(h, h->srv); + } /* Send the request */ - n = sendto(h->fd, h->request, h->req_len, 0, + n = sendto(h->fd, h->out, h->out_len, 0, (const struct sockaddr *)&h->servers[h->srv].addr, sizeof h->servers[h->srv].addr); - if (n != h->req_len) { + if (n != h->out_len) { if (n == -1) generr(h, "sendto: %s", strerror(errno)); else @@ -588,22 +668,117 @@ rad_continue_send_request(struct rad_handle *h, int selected, int *fd, } int +rad_receive_request(struct rad_handle *h) +{ + struct sockaddr_in from; + socklen_t fromlen; + int n; + + if (h->type != RADIUS_SERVER) { + generr(h, "denied function call"); + return (-1); + } + h->srv = -1; + fromlen = sizeof(from); + h->in_len = recvfrom(h->fd, h->in, + MSGSIZE, MSG_WAITALL, (struct sockaddr *)&from, &fromlen); + if (h->in_len == -1) { + generr(h, "recvfrom: %s", strerror(errno)); + return (-1); + } + for (n = 0; n < h->num_servers; n++) { + if (h->servers[n].addr.sin_addr.s_addr == from.sin_addr.s_addr) { + h->servers[n].addr.sin_port = from.sin_port; + h->srv = n; + break; + } + } + if (h->srv == -1) + return (-2); + if (is_valid_request(h)) { + h->in_len = h->in[POS_LENGTH] << 8 | + h->in[POS_LENGTH+1]; + h->in_pos = POS_ATTRS; + return (h->in[POS_CODE]); + } + return (-3); +} + +int +rad_send_response(struct rad_handle *h) +{ + int n; + + if (h->type != RADIUS_SERVER) { + generr(h, "denied function call"); + return (-1); + } + /* Fill in the length field in the message */ + h->out[POS_LENGTH] = h->out_len >> 8; + h->out[POS_LENGTH+1] = h->out_len; + + insert_message_authenticator(h, + (h->in[POS_CODE] == RAD_ACCESS_REQUEST) ? 1 : 0); + insert_request_authenticator(h, 1); + + /* Send the request */ + n = sendto(h->fd, h->out, h->out_len, 0, + (const struct sockaddr *)&h->servers[h->srv].addr, + sizeof h->servers[h->srv].addr); + if (n != h->out_len) { + if (n == -1) + generr(h, "sendto: %s", strerror(errno)); + else + generr(h, "sendto: short write"); + return -1; + } + + return 0; +} + +int rad_create_request(struct rad_handle *h, int code) { int i; - h->request[POS_CODE] = code; - h->request[POS_IDENT] = ++h->ident; - /* Create a random authenticator */ - for (i = 0; i < LEN_AUTH; i += 2) { - long r; - r = random(); - h->request[POS_AUTH+i] = (u_char)r; - h->request[POS_AUTH+i+1] = (u_char)(r >> 8); + if (h->type == RADIUS_SERVER) { + generr(h, "denied function call"); + return (-1); + } + h->out[POS_CODE] = code; + h->out[POS_IDENT] = ++h->ident; + if (code == RAD_ACCESS_REQUEST) { + /* Create a random authenticator */ + for (i = 0; i < LEN_AUTH; i += 2) { + long r; + r = random(); + h->out[POS_AUTH+i] = (u_char)r; + h->out[POS_AUTH+i+1] = (u_char)(r >> 8); + } + } else + memset(&h->out[POS_AUTH], 0, LEN_AUTH); + h->out_len = POS_ATTRS; + clear_password(h); + h->authentic_pos = 0; + h->out_created = 1; + return 0; +} + +int +rad_create_response(struct rad_handle *h, int code) +{ + + if (h->type != RADIUS_SERVER) { + generr(h, "denied function call"); + return (-1); } - h->req_len = POS_ATTRS; + h->out[POS_CODE] = code; + h->out[POS_IDENT] = h->in[POS_IDENT]; + memset(&h->out[POS_AUTH], 0, LEN_AUTH); + h->out_len = POS_ATTRS; clear_password(h); - h->request_created = 1; + h->authentic_pos = 0; + h->out_created = 1; return 0; } @@ -647,20 +822,20 @@ rad_get_attr(struct rad_handle *h, const void **value, size_t *len) { int type; - if (h->resp_pos >= h->resp_len) + if (h->in_pos >= h->in_len) return 0; - if (h->resp_pos + 2 > h->resp_len) { + if (h->in_pos + 2 > h->in_len) { generr(h, "Malformed attribute in response"); return -1; } - type = h->response[h->resp_pos++]; - *len = h->response[h->resp_pos++] - 2; - if (h->resp_pos + (int)*len > h->resp_len) { + type = h->in[h->in_pos++]; + *len = h->in[h->in_pos++] - 2; + if (h->in_pos + (int)*len > h->in_len) { generr(h, "Malformed attribute in response"); return -1; } - *value = &h->response[h->resp_pos]; - h->resp_pos += *len; + *value = &h->in[h->in_pos]; + h->in_pos += *len; return type; } @@ -672,6 +847,10 @@ rad_init_send_request(struct rad_handle *h, int *fd, struct timeval *tv) { int srv; + if (h->type == RADIUS_SERVER) { + generr(h, "denied function call"); + return (-1); + } /* Make sure we have a socket to use */ if (h->fd == -1) { struct sockaddr_in sin; @@ -694,7 +873,7 @@ rad_init_send_request(struct rad_handle *h, int *fd, struct timeval *tv) } } - if (h->request[POS_CODE] == RAD_ACCOUNTING_REQUEST) { + if (h->out[POS_CODE] != RAD_ACCESS_REQUEST) { /* Make sure no password given */ if (h->pass_pos || h->chap_pass) { generr(h, "User or Chap Password" @@ -718,8 +897,8 @@ rad_init_send_request(struct rad_handle *h, int *fd, struct timeval *tv) } /* Fill in the length field in the message */ - h->request[POS_LENGTH] = h->req_len >> 8; - h->request[POS_LENGTH+1] = h->req_len; + h->out[POS_LENGTH] = h->out_len >> 8; + h->out[POS_LENGTH+1] = h->out_len; /* * Count the total number of tries we will make, and zero the @@ -763,7 +942,7 @@ rad_auth_open(void) h->chap_pass = 0; h->authentic_pos = 0; h->type = RADIUS_AUTH; - h->request_created = 0; + h->out_created = 0; h->eap_msg = 0; } return h; @@ -781,6 +960,19 @@ rad_acct_open(void) } struct rad_handle * +rad_server_open(int fd) +{ + struct rad_handle *h; + + h = rad_open(); + if (h != NULL) { + h->type = RADIUS_SERVER; + h->fd = fd; + } + return h; +} + +struct rad_handle * rad_open(void) { return rad_auth_open(); @@ -797,13 +989,13 @@ rad_put_attr(struct rad_handle *h, int type, const void *value, size_t len) { int result; - if (!h->request_created) { + if (!h->out_created) { generr(h, "Please call rad_create_request()" " before putting attributes"); return -1; } - if (h->request[POS_CODE] == RAD_ACCOUNTING_REQUEST) { + if (h->out[POS_CODE] == RAD_ACCOUNTING_REQUEST) { if (type == RAD_EAP_MESSAGE) { generr(h, "EAP-Message attribute is not valid" " in accounting requests"); @@ -858,14 +1050,14 @@ rad_put_message_authentic(struct rad_handle *h) #ifdef WITH_SSL u_char md_zero[MD5_DIGEST_LENGTH]; - if (h->request[POS_CODE] == RAD_ACCOUNTING_REQUEST) { + if (h->out[POS_CODE] == RAD_ACCOUNTING_REQUEST) { generr(h, "Message-Authenticator is not valid" " in accounting requests"); return -1; } if (h->authentic_pos == 0) { - h->authentic_pos = h->req_len; + h->authentic_pos = h->out_len; memset(md_zero, 0, sizeof(md_zero)); return (put_raw_attr(h, RAD_MESSAGE_AUTHENTIC, md_zero, sizeof(md_zero))); @@ -1041,7 +1233,7 @@ rad_put_vendor_attr(struct rad_handle *h, int vendor, int type, struct vendor_attribute *attr; int res; - if (!h->request_created) { + if (!h->out_created) { generr(h, "Please call rad_create_request()" " before putting attributes"); return -1; @@ -1088,7 +1280,7 @@ rad_request_authenticator(struct rad_handle *h, char *buf, size_t len) { if (len < LEN_AUTH) return (-1); - memcpy(buf, h->request + POS_AUTH, LEN_AUTH); + memcpy(buf, h->out + POS_AUTH, LEN_AUTH); if (len > LEN_AUTH) buf[LEN_AUTH] = '\0'; return (LEN_AUTH); diff --git a/lib/libradius/radlib.h b/lib/libradius/radlib.h index 2c42c3a..b26be41 100644 --- a/lib/libradius/radlib.h +++ b/lib/libradius/radlib.h @@ -42,6 +42,12 @@ #define RAD_ACCOUNTING_REQUEST 4 #define RAD_ACCOUNTING_RESPONSE 5 #define RAD_ACCESS_CHALLENGE 11 +#define RAD_DISCONNECT_REQUEST 40 +#define RAD_DISCONNECT_ACK 41 +#define RAD_DISCONNECT_NAK 42 +#define RAD_COA_REQUEST 43 +#define RAD_COA_ACK 44 +#define RAD_COA_NAK 45 /* Attribute types and values */ #define RAD_USER_NAME 1 /* String */ @@ -179,6 +185,8 @@ #define RAD_ACCT_MULTI_SESSION_ID 50 /* String */ #define RAD_ACCT_LINK_COUNT 51 /* Integer */ +#define RAD_ERROR_CAUSE 101 /* Integer */ + struct rad_handle; struct timeval; @@ -192,6 +200,7 @@ int rad_config(struct rad_handle *, const char *); int rad_continue_send_request(struct rad_handle *, int, int *, struct timeval *); int rad_create_request(struct rad_handle *, int); +int rad_create_response(struct rad_handle *, int); struct in_addr rad_cvt_addr(const void *); u_int32_t rad_cvt_int(const void *); char *rad_cvt_string(const void *, size_t); @@ -209,7 +218,10 @@ int rad_put_string(struct rad_handle *, int, int rad_put_message_authentic(struct rad_handle *); ssize_t rad_request_authenticator(struct rad_handle *, char *, size_t); +int rad_receive_request(struct rad_handle *); int rad_send_request(struct rad_handle *); +int rad_send_response(struct rad_handle *); +struct rad_handle *rad_server_open(int fd); const char *rad_server_secret(struct rad_handle *); const char *rad_strerror(struct rad_handle *); u_char *rad_demangle(struct rad_handle *, const void *, diff --git a/lib/libradius/radlib_private.h b/lib/libradius/radlib_private.h index d323cbd..d886c74 100644 --- a/lib/libradius/radlib_private.h +++ b/lib/libradius/radlib_private.h @@ -38,6 +38,7 @@ /* Handle types */ #define RADIUS_AUTH 0 /* RADIUS authentication, default */ #define RADIUS_ACCT 1 /* RADIUS accounting */ +#define RADIUS_SERVER 2 /* RADIUS server */ /* Defaults */ #define MAXTRIES 3 @@ -75,18 +76,18 @@ struct rad_handle { int num_servers; /* Number of valid server entries */ int ident; /* Current identifier value */ char errmsg[ERRSIZE]; /* Most recent error message */ - unsigned char request[MSGSIZE]; /* Request to send */ - char request_created; /* rad_create_request() called? */ - int req_len; /* Length of request */ + unsigned char out[MSGSIZE]; /* Request to send */ + char out_created; /* rad_create_request() called? */ + int out_len; /* Length of request */ char pass[PASSSIZE]; /* Cleartext password */ int pass_len; /* Length of cleartext password */ int pass_pos; /* Position of scrambled password */ - char chap_pass; /* Have we got a CHAP_PASSWORD ? */ + char chap_pass; /* Have we got a CHAP_PASSWORD ? */ int authentic_pos; /* Position of message authenticator */ char eap_msg; /* Are we an EAP Proxy? */ - unsigned char response[MSGSIZE]; /* Response received */ - int resp_len; /* Length of response */ - int resp_pos; /* Current position scanning attrs */ + unsigned char in[MSGSIZE]; /* Response received */ + int in_len; /* Length of response */ + int in_pos; /* Current position scanning attrs */ int total_tries; /* How many requests we'll send */ int try; /* How many requests we've sent */ int srv; /* Server number we did last */ -- cgit v1.1 From f8c10fe31db266a7800e82230f6a563e1b5435ea Mon Sep 17 00:00:00 2001 From: mav Date: Fri, 30 Oct 2009 13:02:08 +0000 Subject: MFC rev. 197621: Fix bug, when RADIUS client gave up after single sendto() error, do not trying backup servers. PR: kern/103764, misc/139214 --- lib/libradius/radlib.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/libradius/radlib.c b/lib/libradius/radlib.c index 8faaf7e..e4e4a94 100644 --- a/lib/libradius/radlib.c +++ b/lib/libradius/radlib.c @@ -650,17 +650,12 @@ rad_continue_send_request(struct rad_handle *h, int selected, int *fd, n = sendto(h->fd, h->out, h->out_len, 0, (const struct sockaddr *)&h->servers[h->srv].addr, sizeof h->servers[h->srv].addr); - if (n != h->out_len) { - if (n == -1) - generr(h, "sendto: %s", strerror(errno)); - else - generr(h, "sendto: short write"); - return -1; - } - + if (n != h->out_len) + tv->tv_sec = 1; /* Do not wait full timeout if send failed. */ + else + tv->tv_sec = h->servers[h->srv].timeout; h->try++; h->servers[h->srv].num_tries++; - tv->tv_sec = h->servers[h->srv].timeout; tv->tv_usec = 0; *fd = h->fd; -- cgit v1.1 From 250abcae59abf84b718b892517af472e077c3a03 Mon Sep 17 00:00:00 2001 From: edwin Date: Sat, 31 Oct 2009 06:35:40 +0000 Subject: MFCs of r197764, r197765, r197766, r197847: Modified locale(1) to be able to show the altmon_X fields and the [cxX]_fmt's. Also modify the "-k list" option to display only fields with a certain prefix. Add the comment "(FreeBSD only)" to the altmonth_x keywords --- lib/libc/locale/nl_langinfo.c | 6 ++++++ lib/libc/stdtime/localtime.c | 11 +++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/locale/nl_langinfo.c b/lib/libc/locale/nl_langinfo.c index 9bd5082..26ca025 100644 --- a/lib/libc/locale/nl_langinfo.c +++ b/lib/libc/locale/nl_langinfo.c @@ -93,6 +93,12 @@ nl_langinfo(nl_item item) case ABMON_9: case ABMON_10: case ABMON_11: case ABMON_12: ret = (char*) __get_current_time_locale()->mon[_REL(ABMON_1)]; break; + case ALTMON_1: case ALTMON_2: case ALTMON_3: case ALTMON_4: + case ALTMON_5: case ALTMON_6: case ALTMON_7: case ALTMON_8: + case ALTMON_9: case ALTMON_10: case ALTMON_11: case ALTMON_12: + ret = (char*) + __get_current_time_locale()->alt_month[_REL(ALTMON_1)]; + break; case ERA: /* XXX: need to be implemented */ ret = ""; diff --git a/lib/libc/stdtime/localtime.c b/lib/libc/stdtime/localtime.c index e0ed73f..9fc5f3e 100644 --- a/lib/libc/stdtime/localtime.c +++ b/lib/libc/stdtime/localtime.c @@ -21,6 +21,7 @@ __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include +#include #include #include #include "private.h" @@ -1413,13 +1414,16 @@ const time_t * const timep; static pthread_mutex_t localtime_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_key_t localtime_key = -1; struct tm *p_tm; + int r; if (__isthreaded != 0) { if (localtime_key < 0) { _pthread_mutex_lock(&localtime_mutex); if (localtime_key < 0) { - if (_pthread_key_create(&localtime_key, free) < 0) { + if ((r = _pthread_key_create(&localtime_key, + free)) != 0) { _pthread_mutex_unlock(&localtime_mutex); + errno = r; return(NULL); } } @@ -1512,13 +1516,16 @@ const time_t * const timep; static pthread_mutex_t gmtime_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_key_t gmtime_key = -1; struct tm *p_tm; + int r; if (__isthreaded != 0) { if (gmtime_key < 0) { _pthread_mutex_lock(&gmtime_mutex); if (gmtime_key < 0) { - if (_pthread_key_create(&gmtime_key, free) < 0) { + if ((r = _pthread_key_create(&gmtime_key, + free)) != 0) { _pthread_mutex_unlock(&gmtime_mutex); + errno = r; return(NULL); } } -- cgit v1.1 From 8aa1396550009dbbb910c8802b9f340d933c5a29 Mon Sep 17 00:00:00 2001 From: sson Date: Sat, 31 Oct 2009 21:22:18 +0000 Subject: MFC 197240,197241,197242,197243,197293,197294,197407: Add EVFILT_USER filter and EV_DISPATCH/EV_RECEIPT flags to kevent(2). Approved by: rwatson (mentor) --- lib/libc/sys/kqueue.2 | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/sys/kqueue.2 b/lib/libc/sys/kqueue.2 index 119ff79..e899a1b 100644 --- a/lib/libc/sys/kqueue.2 +++ b/lib/libc/sys/kqueue.2 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 6, 2007 +.Dd September 15, 2009 .Dt KQUEUE 2 .Os .Sh NAME @@ -201,11 +201,25 @@ Disable the event so .Fn kevent will not return it. The filter itself is not disabled. +.It EV_DISPATCH +Disable the event source immediately after delivery of an event. +See +.Dv EV_DISABLE +above. .It EV_DELETE Removes the event from the kqueue. Events which are attached to file descriptors are automatically deleted on the last close of the descriptor. +.It EV_RECEIPT +This flag is useful for making bulk changes to a kqueue without draining +any pending events. +When passed as input, it forces +.Dv EV_ERROR +to always be returned. +When a filter is successfully added the +.Va data +field will be zero. .It EV_ONESHOT Causes the event to return only the first occurrence of the filter being triggered. @@ -441,6 +455,44 @@ The link state is invalid. On return, .Va fflags contains the events which triggered the filter. +.It Dv EVFILT_USER +Establishes a user event identified by +.Va ident +which is not assosicated with any kernel mechanism but is triggered by +user level code. +The lower 24 bits of the +.Va fflags +may be used for user defined flags and manipulated using the following: +.Bl -tag -width XXNOTE_FFLAGSMASK +.It Dv NOTE_FFNOP +Ignore the input +.Va fflags . +.It Dv NOTE_FFAND +Bitwise AND +.Va fflags . +.It Dv NOTE_FFOR +Bitwise OR +.Va fflags . +.It Dv NOTE_COPY +Copy +.Va fflags . +.It Dv NOTE_FFCTRLMASK +Control mask for +.Va fflags . +.It Dv NOTE_FFLAGSMASK +User defined flag mask for +.Va fflags . +.El +.Pp +A user event is triggered for output with the following: +.Bl -tag -width XXNOTE_FFLAGSMASK +.It Dv NOTE_TRIGGER +Cause the event to be triggered. +.El +.Pp +On return, +.Va fflags +contains the users defined flags in the lower 24 bits. .El .Sh RETURN VALUES The -- cgit v1.1 From 3f9761f9fd2e480b15ee8885d6684e9ef6d6ab63 Mon Sep 17 00:00:00 2001 From: alc Date: Sun, 1 Nov 2009 19:22:07 +0000 Subject: MFC r197331, r197394 Add getpagesizes(3). --- lib/libc/gen/Makefile.inc | 6 +-- lib/libc/gen/Symbol.map | 4 ++ lib/libc/gen/getpagesizes.3 | 99 +++++++++++++++++++++++++++++++++++++++++++++ lib/libc/gen/getpagesizes.c | 78 +++++++++++++++++++++++++++++++++++ 4 files changed, 184 insertions(+), 3 deletions(-) create mode 100644 lib/libc/gen/getpagesizes.3 create mode 100644 lib/libc/gen/getpagesizes.c (limited to 'lib') diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 8837038..b06f846 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -15,7 +15,7 @@ SRCS+= __getosreldate.c __xuname.c \ getbootfile.c getbsize.c \ getcap.c getcwd.c getdomainname.c getgrent.c getgrouplist.c \ gethostname.c getloadavg.c getlogin.c getmntinfo.c getnetgrent.c \ - getosreldate.c getpagesize.c \ + getosreldate.c getpagesize.c getpagesizes.c \ getpeereid.c getprogname.c getpwent.c getttyent.c \ getusershell.c getvfsbyname.c glob.c \ initgroups.c isatty.c isinf.c isnan.c jrand48.c lcong48.c \ @@ -51,8 +51,8 @@ MAN+= alarm.3 arc4random.3 \ getbootfile.3 getbsize.3 getcap.3 getcontext.3 getcwd.3 \ getdiskbyname.3 getdomainname.3 getfsent.3 \ getgrent.3 getgrouplist.3 gethostname.3 getloadavg.3 \ - getmntinfo.3 getnetgrent.3 getosreldate.3 \ - getpagesize.3 getpass.3 getpeereid.3 getprogname.3 getpwent.3 \ + getmntinfo.3 getnetgrent.3 getosreldate.3 getpagesize.3 \ + getpagesizes.3 getpass.3 getpeereid.3 getprogname.3 getpwent.3 \ getttyent.3 getusershell.3 getvfsbyname.3 \ glob.3 initgroups.3 isgreater.3 ldexp.3 lockf.3 makecontext.3 \ modf.3 \ diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index 4f1efaf..197430a 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -366,6 +366,10 @@ FBSD_1.1 { tcsetsid; }; +FBSD_1.2 { + getpagesizes; +}; + FBSDprivate_1.0 { /* needed by thread libraries */ __thr_jtable; diff --git a/lib/libc/gen/getpagesizes.3 b/lib/libc/gen/getpagesizes.3 new file mode 100644 index 0000000..959df81 --- /dev/null +++ b/lib/libc/gen/getpagesizes.3 @@ -0,0 +1,99 @@ +.\" Copyright (c) 2009 Alan L. Cox +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 21, 2009 +.Dt GETPAGESIZES 3 +.Os +.Sh NAME +.Nm getpagesizes +.Nd "get system page sizes" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/mman.h +.Ft int +.Fn getpagesizes "size_t pagesize[]" "int nelem" +.Sh DESCRIPTION +The +.Fn getpagesizes +function retrieves page size information from the system. +When it is called with +.Fa pagesize +specified as +.Dv NULL +and +.Fa nelem +specified as 0, it returns the number of distinct page sizes that are +supported by the system. +Otherwise, it assigns up to +.Fa nelem +of the system-supported page sizes to consecutive elements of the +array referenced by +.Fa pagesize . +These page sizes are expressed in bytes. +In this case, +.Fn getpagesizes +returns the number of such page sizes that it assigned to the array. +.Sh RETURN VALUES +If successful, the +.Fn getpagesizes +function returns either the number of page sizes that are supported by +the system or the number of supported page sizes that it assigned to +the array referenced by +.Fa pagesize . +Otherwise, it returns the value\~\-1 and sets +.Va errno +to indicate the error. +.Sh ERRORS +The +.Fn getpagesizes +function will succeed unless: +.Bl -tag -width Er +.It Bq Er EINVAL +The +.Fa pagesize +argument is +.Dv NULL +and the +.Fa nelem +argument is non-zero. +.It Bq Er EINVAL +The +.Fa nelem +argument is less than zero. +.El +.Sh SEE ALSO +.Xr getpagesize 3 +.Sh HISTORY +The +.Fn getpagesizes +function first appeared in Solaris 9. +This manual page was written in conjunction with a new but compatible +implementation that was first released in +.Fx 7.3 . +.Sh AUTHORS +This manual page was written by +.An Alan L. Cox Aq alc@cs.rice.edu . diff --git a/lib/libc/gen/getpagesizes.c b/lib/libc/gen/getpagesizes.c new file mode 100644 index 0000000..b0de939 --- /dev/null +++ b/lib/libc/gen/getpagesizes.c @@ -0,0 +1,78 @@ +/*- + * Copyright (c) 2009 Alan L. Cox + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include + +/* + * Retrieves page size information from the system. Specifically, returns the + * number of distinct page sizes that are supported by the system, if + * "pagesize" is NULL and "nelem" is 0. Otherwise, assigns up to "nelem" of + * the system-supported page sizes to consecutive elements of the array + * referenced by "pagesize", and returns the number of such page sizes that it + * assigned to the array. These page sizes are expressed in bytes. + * + * The implementation of this function does not directly or indirectly call + * malloc(3) or any other dynamic memory allocator that may itself call this + * function. + */ +int +getpagesizes(size_t pagesize[], int nelem) +{ + static u_long ps[MAXPAGESIZES]; + static int nops; + size_t size; + int i; + + if (nelem < 0 || (nelem > 0 && pagesize == NULL)) { + errno = EINVAL; + return (-1); + } + /* Cache the result of the sysctl(2). */ + if (nops == 0) { + size = sizeof(ps); + if (sysctlbyname("hw.pagesizes", ps, &size, NULL, 0) == -1) + return (-1); + /* Count the number of page sizes that are supported. */ + nops = size / sizeof(ps[0]); + while (nops > 0 && ps[nops - 1] == 0) + nops--; + } + if (pagesize == NULL) + return (nops); + /* Return up to "nelem" page sizes from the cached result. */ + if (nelem > nops) + nelem = nops; + for (i = 0; i < nelem; i++) + pagesize[i] = ps[i]; + return (nelem); +} -- cgit v1.1 From ae23acf799f18392a73bb2f885e0a8e47e358720 Mon Sep 17 00:00:00 2001 From: alc Date: Sun, 1 Nov 2009 20:24:17 +0000 Subject: MFC r197163 Add the FBSD_1.2 namespace. Reminded by: kib --- lib/libc/Versions.def | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/Versions.def b/lib/libc/Versions.def index a17e0a5..da0ca6f 100644 --- a/lib/libc/Versions.def +++ b/lib/libc/Versions.def @@ -1,5 +1,11 @@ # $FreeBSD$ +# +# Note: Whenever bumping the FBSD version, always make +# FBSDprivate_1.0 depend on the new FBSD version. +# This will keep it at the end of the dependency chain. +# + # This is our first version; it depends on no other. # This version was first added to 7.0-current. FBSD_1.0 { @@ -9,6 +15,10 @@ FBSD_1.0 { FBSD_1.1 { } FBSD_1.0; +# This version was first added to 9.0-current. +FBSD_1.2 { +} FBSD_1.1; + # This is our private namespace. Any global interfaces that are # strictly for use only by other FreeBSD applications and libraries # are listed here. We use a separate namespace so we can write @@ -16,4 +26,4 @@ FBSD_1.1 { # # Please do NOT increment the version of this namespace. FBSDprivate_1.0 { -} FBSD_1.1; +} FBSD_1.2; -- cgit v1.1 From 29a76c7eb2b81324d3a4f6987a1aaaa32f8732ab Mon Sep 17 00:00:00 2001 From: alc Date: Mon, 2 Nov 2009 18:44:01 +0000 Subject: MFC r197524 Make malloc(3) superpage aware. --- lib/libc/stdlib/malloc.3 | 6 ++++-- lib/libc/stdlib/malloc.c | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdlib/malloc.3 b/lib/libc/stdlib/malloc.3 index a621a24..308ba7b 100644 --- a/lib/libc/stdlib/malloc.3 +++ b/lib/libc/stdlib/malloc.3 @@ -32,7 +32,7 @@ .\" @(#)malloc.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd August 26, 2008 +.Dd September 26, 2009 .Dt MALLOC 3 .Os .Sh NAME @@ -245,7 +245,8 @@ will be initialized to 0x5a. This is intended for debugging and will impact performance negatively. .It K Double/halve the virtual memory chunk size. -The default chunk size is 1 MB. +The default chunk size is the maximum of 1 MB and the largest +page size that is less than or equal to 4 MB. .It M Use .Xr mmap 2 @@ -561,6 +562,7 @@ _malloc_options = "X"; .Xr alloca 3 , .Xr atexit 3 , .Xr getpagesize 3 , +.Xr getpagesizes 3 , .Xr memory 3 , .Xr posix_memalign 3 .Sh STANDARDS diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index 8b13d12..80e26e6 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -4795,6 +4795,21 @@ malloc_init_hard(void) } } + /* + * Increase the chunk size to the largest page size that is greater + * than the default chunk size and less than or equal to 4MB. + */ + { + size_t pagesizes[MAXPAGESIZES]; + int k, nsizes; + + nsizes = getpagesizes(pagesizes, MAXPAGESIZES); + for (k = 0; k < nsizes; k++) + if (pagesizes[k] <= (1LU << 22)) + while ((1LU << opt_chunk_2pow) < pagesizes[k]) + opt_chunk_2pow++; + } + for (i = 0; i < 3; i++) { unsigned j; -- cgit v1.1 From 4d34fdb382f7bfd134efd42ee3fd2344edd5391f Mon Sep 17 00:00:00 2001 From: brueffer Date: Wed, 4 Nov 2009 13:40:04 +0000 Subject: MFC: r198542 Initialize f_rabuf in the raw device case. A subsequent close() later on would try to free it, leading to a crash. --- lib/libstand/open.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/libstand/open.c b/lib/libstand/open.c index acbda1c..49dc660 100644 --- a/lib/libstand/open.c +++ b/lib/libstand/open.c @@ -113,6 +113,7 @@ open(const char *fname, int mode) /* see if we opened a raw device; otherwise, 'file' is the file name. */ if (file == (char *)0 || *file == '\0') { f->f_flags |= F_RAW; + f->f_rabuf = NULL; return (fd); } -- cgit v1.1 From 42a4c580efb286c53601701f114a993d65880356 Mon Sep 17 00:00:00 2001 From: thompsa Date: Wed, 11 Nov 2009 01:27:58 +0000 Subject: MFC r199055 - fix refcounting error during data transfer - fix a memory leak on the USB backend - fix invalid pointer computations (in one case memory outside the allocated area was written in LibUSB v1.0) - make sure memory is always initialised, also in failing cases - add missing functions from v1.0.4 PR: usb/140325 --- lib/libusb/libusb.h | 37 +++++++--- lib/libusb/libusb10.c | 26 ++++++- lib/libusb/libusb10_desc.c | 34 ++++++--- lib/libusb/libusb10_io.c | 172 +++++++++++++++++++++++++++++++++++++++++-- lib/libusb/libusb20.c | 8 +- lib/libusb/libusb20_desc.c | 3 + lib/libusb/libusb20_ugen20.c | 6 ++ 7 files changed, 251 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/lib/libusb/libusb.h b/lib/libusb/libusb.h index f65b57a..bdf2812 100644 --- a/lib/libusb/libusb.h +++ b/lib/libusb/libusb.h @@ -271,9 +271,11 @@ typedef struct libusb_control_setup { uint16_t wLength; } libusb_control_setup; +#define LIBUSB_CONTROL_SETUP_SIZE 8 /* bytes */ + typedef struct libusb_iso_packet_descriptor { - unsigned int length; - unsigned int actual_length; + uint32_t length; + uint32_t actual_length; enum libusb_transfer_status status; } libusb_iso_packet_descriptor __aligned(sizeof(void *)); @@ -282,9 +284,9 @@ typedef void (*libusb_transfer_cb_fn) (struct libusb_transfer *transfer); typedef struct libusb_transfer { libusb_device_handle *dev_handle; uint8_t flags; - unsigned int endpoint; + uint32_t endpoint; uint8_t type; - unsigned int timeout; + uint32_t timeout; enum libusb_transfer_status status; int length; int actual_length; @@ -320,7 +322,7 @@ int libusb_get_configuration(libusb_device_handle * devh, int *config); int libusb_set_configuration(libusb_device_handle * devh, int configuration); int libusb_claim_interface(libusb_device_handle * devh, int interface_number); int libusb_release_interface(libusb_device_handle * devh, int interface_number); -int libusb_reset_device(libusb_device_handle * dev); +int libusb_reset_device(libusb_device_handle * devh); int libusb_kernel_driver_active(libusb_device_handle * devh, int interface); int libusb_detach_kernel_driver(libusb_device_handle * devh, int interface); int libusb_attach_kernel_driver(libusb_device_handle * devh, int interface); @@ -333,7 +335,8 @@ int libusb_get_active_config_descriptor(libusb_device * dev, struct libusb_confi int libusb_get_config_descriptor(libusb_device * dev, uint8_t config_index, struct libusb_config_descriptor **config); int libusb_get_config_descriptor_by_value(libusb_device * dev, uint8_t bConfigurationValue, struct libusb_config_descriptor **config); void libusb_free_config_descriptor(struct libusb_config_descriptor *config); -int libusb_get_string_descriptor_ascii(libusb_device_handle * dev, uint8_t desc_index, uint8_t *data, int length); +int libusb_get_string_descriptor_ascii(libusb_device_handle * devh, uint8_t desc_index, uint8_t *data, int length); +int libusb_get_descriptor(libusb_device_handle * devh, uint8_t desc_type, uint8_t desc_index, uint8_t *data, int length); /* Asynchronous device I/O */ @@ -341,7 +344,16 @@ struct libusb_transfer *libusb_alloc_transfer(int iso_packets); void libusb_free_transfer(struct libusb_transfer *transfer); int libusb_submit_transfer(struct libusb_transfer *transfer); int libusb_cancel_transfer(struct libusb_transfer *transfer); -uint8_t *libusb_get_iso_packet_buffer_simple(struct libusb_transfer *transfer, unsigned int packet); +uint8_t *libusb_get_iso_packet_buffer(struct libusb_transfer *transfer, uint32_t index); +uint8_t *libusb_get_iso_packet_buffer_simple(struct libusb_transfer *transfer, uint32_t index); +void libusb_set_iso_packet_lengths(struct libusb_transfer *transfer, uint32_t length); +uint8_t *libusb_control_transfer_get_data(struct libusb_transfer *transfer); +struct libusb_control_setup *libusb_control_transfer_get_setup(struct libusb_transfer *transfer); +void libusb_fill_control_setup(uint8_t *buf, uint8_t bmRequestType, uint8_t bRequest, uint16_t wValue, uint16_t wIndex, uint16_t wLength); +void libusb_fill_control_transfer(struct libusb_transfer *transfer, libusb_device_handle *devh, uint8_t *buf, libusb_transfer_cb_fn callback, void *user_data, uint32_t timeout); +void libusb_fill_bulk_transfer(struct libusb_transfer *transfer, libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, int length, libusb_transfer_cb_fn callback, void *user_data, uint32_t timeout); +void libusb_fill_interrupt_transfer(struct libusb_transfer *transfer, libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, int length, libusb_transfer_cb_fn callback, void *user_data, uint32_t timeout); +void libusb_fill_iso_transfer(struct libusb_transfer *transfer, libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, int length, int npacket, libusb_transfer_cb_fn callback, void *user_data, uint32_t timeout); /* Polling and timing */ @@ -362,9 +374,14 @@ struct libusb_pollfd **libusb_get_pollfds(libusb_context * ctx); /* Synchronous device I/O */ -int libusb_control_transfer(libusb_device_handle * devh, uint8_t bmRequestType, uint8_t bRequest, uint16_t wValue, uint16_t wIndex, uint8_t *data, uint16_t wLength, unsigned int timeout); -int libusb_bulk_transfer(libusb_device_handle *devh, uint8_t endpoint, uint8_t *data, int length, int *transferred, unsigned int timeout); -int libusb_interrupt_transfer(libusb_device_handle *devh, uint8_t endpoint, uint8_t *data, int length, int *transferred, unsigned int timeout); +int libusb_control_transfer(libusb_device_handle * devh, uint8_t bmRequestType, uint8_t bRequest, uint16_t wValue, uint16_t wIndex, uint8_t *data, uint16_t wLength, uint32_t timeout); +int libusb_bulk_transfer(libusb_device_handle * devh, uint8_t endpoint, uint8_t *data, int length, int *transferred, uint32_t timeout); +int libusb_interrupt_transfer(libusb_device_handle * devh, uint8_t endpoint, uint8_t *data, int length, int *transferred, uint32_t timeout); + +/* Byte-order */ + +uint16_t libusb_cpu_to_le16(uint16_t x); +uint16_t libusb_le16_to_cpu(uint16_t x); #if 0 { /* indent fix */ diff --git a/lib/libusb/libusb10.c b/lib/libusb/libusb10.c index fa9130d..55ee1c5 100644 --- a/lib/libusb/libusb10.c +++ b/lib/libusb/libusb10.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "libusb20.h" #include "libusb20_desc.h" @@ -185,8 +186,6 @@ libusb_get_device_list(libusb_context *ctx, libusb_device ***list) /* create libusb v1.0 compliant devices */ i = 0; while ((pdev = libusb20_be_device_foreach(usb_backend, NULL))) { - /* get device into libUSB v1.0 list */ - libusb20_be_dequeue_device(usb_backend, pdev); dev = malloc(sizeof(*dev)); if (dev == NULL) { @@ -199,6 +198,10 @@ libusb_get_device_list(libusb_context *ctx, libusb_device ***list) libusb20_be_free(usb_backend); return (LIBUSB_ERROR_NO_MEM); } + + /* get device into libUSB v1.0 list */ + libusb20_be_dequeue_device(usb_backend, pdev); + memset(dev, 0, sizeof(*dev)); /* init transfer queues */ @@ -416,6 +419,8 @@ libusb_close(struct libusb20_device *pdev) libusb10_remove_pollfd(ctx, &dev->dev_poll); libusb20_dev_close(pdev); + + /* unref will free the "pdev" when the refcount reaches zero */ libusb_unref_device(dev); /* make sure our event loop detects the closed device */ @@ -1195,7 +1200,7 @@ libusb_submit_transfer(struct libusb_transfer *uxfer) struct libusb20_transfer *pxfer1; struct libusb_super_transfer *sxfer; struct libusb_device *dev; - unsigned int endpoint; + uint32_t endpoint; int err; if (uxfer == NULL) @@ -1252,7 +1257,7 @@ libusb_cancel_transfer(struct libusb_transfer *uxfer) struct libusb20_transfer *pxfer1; struct libusb_super_transfer *sxfer; struct libusb_device *dev; - unsigned int endpoint; + uint32_t endpoint; if (uxfer == NULL) return (LIBUSB_ERROR_INVALID_PARAM); @@ -1312,3 +1317,16 @@ libusb10_cancel_all_transfer(libusb_device *dev) { /* TODO */ } + +uint16_t +libusb_cpu_to_le16(uint16_t x) +{ + return (htole16(x)); +} + +uint16_t +libusb_le16_to_cpu(uint16_t x) +{ + return (le16toh(x)); +} + diff --git a/lib/libusb/libusb10_desc.c b/lib/libusb/libusb10_desc.c index c43443a..0215bce 100644 --- a/lib/libusb/libusb10_desc.c +++ b/lib/libusb/libusb10_desc.c @@ -35,6 +35,8 @@ #include "libusb.h" #include "libusb10.h" +#define N_ALIGN(n) (-((-(n)) & (-8UL))) + /* USB descriptors */ int @@ -114,17 +116,17 @@ libusb_get_config_descriptor(libusb_device *dev, uint8_t config_index, nalt = nif = pconf->num_interface; nep = 0; - nextra = pconf->extra.len; + nextra = N_ALIGN(pconf->extra.len); for (i = 0; i < nif; i++) { pinf = pconf->interface + i; - nextra += pinf->extra.len; + nextra += N_ALIGN(pinf->extra.len); nep += pinf->num_endpoints; k = pinf->num_endpoints; pend = pinf->endpoints; while (k--) { - nextra += pend->extra.len; + nextra += N_ALIGN(pend->extra.len); pend++; } @@ -132,12 +134,12 @@ libusb_get_config_descriptor(libusb_device *dev, uint8_t config_index, nalt += pinf->num_altsetting; pinf = pinf->altsetting; while (j--) { - nextra += pinf->extra.len; + nextra += N_ALIGN(pinf->extra.len); nep += pinf->num_endpoints; k = pinf->num_endpoints; pend = pinf->endpoints; while (k--) { - nextra += pend->extra.len; + nextra += N_ALIGN(pend->extra.len); pend++; } pinf++; @@ -150,17 +152,18 @@ libusb_get_config_descriptor(libusb_device *dev, uint8_t config_index, (nalt * sizeof(libusb_interface_descriptor)) + (nep * sizeof(libusb_endpoint_descriptor)); + nextra = N_ALIGN(nextra); + pconfd = malloc(nextra); if (pconfd == NULL) { free(pconf); return (LIBUSB_ERROR_NO_MEM); } - /* make sure memory is clean */ + /* make sure memory is initialised */ memset(pconfd, 0, nextra); - pconfd->interface = (libusb_interface *) (pconfd + - sizeof(libusb_config_descriptor)); + pconfd->interface = (libusb_interface *) (pconfd + 1); ifd = (libusb_interface_descriptor *) (pconfd->interface + nif); endd = (libusb_endpoint_descriptor *) (ifd + nalt); @@ -181,7 +184,7 @@ libusb_get_config_descriptor(libusb_device *dev, uint8_t config_index, pconfd->extra_length = pconf->extra.len; pconfd->extra = pextra; memcpy(pextra, pconf->extra.ptr, pconfd->extra_length); - pextra += pconfd->extra_length; + pextra += N_ALIGN(pconfd->extra_length); } /* setup all interface and endpoint pointers */ @@ -221,7 +224,7 @@ libusb_get_config_descriptor(libusb_device *dev, uint8_t config_index, ifd->extra_length = pinf->extra.len; ifd->extra = pextra; memcpy(pextra, pinf->extra.ptr, pinf->extra.len); - pextra += pinf->extra.len; + pextra += N_ALIGN(pinf->extra.len); } for (k = 0; k < pinf->num_endpoints; k++) { pend = &pinf->endpoints[k]; @@ -238,7 +241,7 @@ libusb_get_config_descriptor(libusb_device *dev, uint8_t config_index, endd->extra_length = pend->extra.len; endd->extra = pextra; memcpy(pextra, pend->extra.ptr, pend->extra.len); - pextra += pend->extra.len; + pextra += N_ALIGN(pend->extra.len); } } } @@ -304,3 +307,12 @@ libusb_get_string_descriptor_ascii(libusb_device_handle *pdev, return (LIBUSB_ERROR_OTHER); } + +int +libusb_get_descriptor(libusb_device_handle * devh, uint8_t desc_type, + uint8_t desc_index, uint8_t *data, int length) +{ + return (libusb_control_transfer(devh, LIBUSB_ENDPOINT_IN, + LIBUSB_REQUEST_GET_DESCRIPTOR, (desc_type << 8) | desc_index, 0, data, + length, 1000)); +} diff --git a/lib/libusb/libusb10_io.c b/lib/libusb/libusb10_io.c index a5bb85f..3d6daef 100644 --- a/lib/libusb/libusb10_io.c +++ b/lib/libusb/libusb10_io.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "libusb20.h" #include "libusb20_desc.h" @@ -148,19 +149,19 @@ libusb10_handle_events_sub(struct libusb_context *ctx, struct timeval *tv) goto do_done; } for (i = 0; i != nfds; i++) { - if (fds[i].revents == 0) - continue; if (ppdev[i] != NULL) { dev = libusb_get_device(ppdev[i]); - err = libusb20_dev_process(ppdev[i]); + if (fds[i].revents == 0) + err = 0; /* nothing to do */ + else + err = libusb20_dev_process(ppdev[i]); + if (err) { /* cancel all transfers - device is gone */ libusb10_cancel_all_transfer(dev); - /* - * make sure we don't go into an infinite - * loop - */ + + /* remove USB device from polling loop */ libusb10_remove_pollfd(dev->ctx, &dev->dev_poll); } CTX_UNLOCK(ctx); @@ -573,3 +574,160 @@ libusb_interrupt_transfer(libusb_device_handle *devh, DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_interrupt_transfer leave"); return (ret); } + +uint8_t * +libusb_get_iso_packet_buffer(struct libusb_transfer *transfer, uint32_t index) +{ + uint8_t *ptr; + uint32_t n; + + if (transfer->num_iso_packets < 0) + return (NULL); + + if (index >= (uint32_t)transfer->num_iso_packets) + return (NULL); + + ptr = transfer->buffer; + if (ptr == NULL) + return (NULL); + + for (n = 0; n != index; n++) { + ptr += transfer->iso_packet_desc[n].length; + } + return (ptr); +} + +uint8_t * +libusb_get_iso_packet_buffer_simple(struct libusb_transfer *transfer, uint32_t index) +{ + uint8_t *ptr; + + if (transfer->num_iso_packets < 0) + return (NULL); + + if (index >= (uint32_t)transfer->num_iso_packets) + return (NULL); + + ptr = transfer->buffer; + if (ptr == NULL) + return (NULL); + + ptr += transfer->iso_packet_desc[0].length * index; + + return (ptr); +} + +void +libusb_set_iso_packet_lengths(struct libusb_transfer *transfer, uint32_t length) +{ + int n; + + if (transfer->num_iso_packets < 0) + return; + + for (n = 0; n != transfer->num_iso_packets; n++) + transfer->iso_packet_desc[n].length = length; +} + +uint8_t * +libusb_control_transfer_get_data(struct libusb_transfer *transfer) +{ + if (transfer->buffer == NULL) + return (NULL); + + return (transfer->buffer + LIBUSB_CONTROL_SETUP_SIZE); +} + +struct libusb_control_setup * +libusb_control_transfer_get_setup(struct libusb_transfer *transfer) +{ + return ((struct libusb_control_setup *)transfer->buffer); +} + +void +libusb_fill_control_setup(uint8_t *buf, uint8_t bmRequestType, + uint8_t bRequest, uint16_t wValue, + uint16_t wIndex, uint16_t wLength) +{ + struct libusb_control_setup *req = (struct libusb_control_setup *)buf; + + /* The alignment is OK for all fields below. */ + req->bmRequestType = bmRequestType; + req->bRequest = bRequest; + req->wValue = htole16(wValue); + req->wIndex = htole16(wIndex); + req->wLength = htole16(wLength); +} + +void +libusb_fill_control_transfer(struct libusb_transfer *transfer, + libusb_device_handle *devh, uint8_t *buf, + libusb_transfer_cb_fn callback, void *user_data, + uint32_t timeout) +{ + struct libusb_control_setup *setup = (struct libusb_control_setup *)buf; + + transfer->dev_handle = devh; + transfer->endpoint = 0; + transfer->type = LIBUSB_TRANSFER_TYPE_CONTROL; + transfer->timeout = timeout; + transfer->buffer = buf; + if (setup != NULL) + transfer->length = LIBUSB_CONTROL_SETUP_SIZE + + le16toh(setup->wLength); + else + transfer->length = 0; + transfer->user_data = user_data; + transfer->callback = callback; + +} + +void +libusb_fill_bulk_transfer(struct libusb_transfer *transfer, + libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, + int length, libusb_transfer_cb_fn callback, void *user_data, + uint32_t timeout) +{ + transfer->dev_handle = devh; + transfer->endpoint = endpoint; + transfer->type = LIBUSB_TRANSFER_TYPE_BULK; + transfer->timeout = timeout; + transfer->buffer = buf; + transfer->length = length; + transfer->user_data = user_data; + transfer->callback = callback; +} + +void +libusb_fill_interrupt_transfer(struct libusb_transfer *transfer, + libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, + int length, libusb_transfer_cb_fn callback, void *user_data, + uint32_t timeout) +{ + transfer->dev_handle = devh; + transfer->endpoint = endpoint; + transfer->type = LIBUSB_TRANSFER_TYPE_INTERRUPT; + transfer->timeout = timeout; + transfer->buffer = buf; + transfer->length = length; + transfer->user_data = user_data; + transfer->callback = callback; +} + +void +libusb_fill_iso_transfer(struct libusb_transfer *transfer, + libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, + int length, int npacket, libusb_transfer_cb_fn callback, + void *user_data, uint32_t timeout) +{ + transfer->dev_handle = devh; + transfer->endpoint = endpoint; + transfer->type = LIBUSB_TRANSFER_TYPE_ISOCHRONOUS; + transfer->timeout = timeout; + transfer->buffer = buf; + transfer->length = length; + transfer->num_iso_packets = npacket; + transfer->user_data = user_data; + transfer->callback = callback; +} + diff --git a/lib/libusb/libusb20.c b/lib/libusb/libusb20.c index 6e0ec47..1a33805 100644 --- a/lib/libusb/libusb20.c +++ b/lib/libusb/libusb20.c @@ -630,6 +630,9 @@ libusb20_dev_req_string_sync(struct libusb20_device *pdev, struct LIBUSB20_CONTROL_SETUP_DECODED req; int error; + /* make sure memory is initialised */ + memset(ptr, 0, len); + if (len < 4) { /* invalid length */ return (LIBUSB20_ERROR_INVALID_PARAM); @@ -1093,7 +1096,8 @@ libusb20_be_free(struct libusb20_backend *pbe) if (pbe->methods->exit_backend) { pbe->methods->exit_backend(pbe); } - return; + /* free backend */ + free(pbe); } void @@ -1101,7 +1105,6 @@ libusb20_be_enqueue_device(struct libusb20_backend *pbe, struct libusb20_device { pdev->beMethods = pbe->methods; /* copy backend methods */ TAILQ_INSERT_TAIL(&(pbe->usb_devs), pdev, dev_entry); - return; } void @@ -1109,5 +1112,4 @@ libusb20_be_dequeue_device(struct libusb20_backend *pbe, struct libusb20_device *pdev) { TAILQ_REMOVE(&(pbe->usb_devs), pdev, dev_entry); - return; } diff --git a/lib/libusb/libusb20_desc.c b/lib/libusb/libusb20_desc.c index e0d2c54..5206cf4 100644 --- a/lib/libusb/libusb20_desc.c +++ b/lib/libusb/libusb20_desc.c @@ -118,6 +118,9 @@ libusb20_parse_config_desc(const void *config_desc) if (lub_config == NULL) { return (NULL); /* out of memory */ } + /* make sure memory is initialised */ + memset(lub_config, 0, size); + lub_interface = (void *)(lub_config + 1); lub_alt_interface = (void *)(lub_interface + niface_no_alt); lub_endpoint = (void *)(lub_interface + niface); diff --git a/lib/libusb/libusb20_ugen20.c b/lib/libusb/libusb20_ugen20.c index f9f3689..efcca63 100644 --- a/lib/libusb/libusb20_ugen20.c +++ b/lib/libusb/libusb20_ugen20.c @@ -449,6 +449,8 @@ ugen20_get_config_desc_full(struct libusb20_device *pdev, uint16_t len; int error; + /* make sure memory is initialised */ + memset(&cdesc, 0, sizeof(cdesc)); memset(&gen_desc, 0, sizeof(gen_desc)); gen_desc.ugd_data = &cdesc; @@ -468,6 +470,10 @@ ugen20_get_config_desc_full(struct libusb20_device *pdev, if (!ptr) { return (LIBUSB20_ERROR_NO_MEM); } + + /* make sure memory is initialised */ + memset(ptr, 0, len); + gen_desc.ugd_data = ptr; gen_desc.ugd_maxlen = len; -- cgit v1.1 From bdd190d3246e1347a952c97b02eb18cd69e8e0a7 Mon Sep 17 00:00:00 2001 From: ume Date: Mon, 16 Nov 2009 03:52:18 +0000 Subject: MFC r199080, r199081, r199082: Add ja_JP.UTF-8 and ja_JP.eucJP catalogs. --- lib/libc/nls/Makefile.inc | 2 + lib/libc/nls/ja_JP.UTF-8.msg | 249 +++++++++++++++++++++++++++++++++++++++++++ lib/libc/nls/ja_JP.eucJP.msg | 249 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 500 insertions(+) create mode 100644 lib/libc/nls/ja_JP.UTF-8.msg create mode 100644 lib/libc/nls/ja_JP.eucJP.msg (limited to 'lib') diff --git a/lib/libc/nls/Makefile.inc b/lib/libc/nls/Makefile.inc index 9e19e85..8e46470 100644 --- a/lib/libc/nls/Makefile.inc +++ b/lib/libc/nls/Makefile.inc @@ -22,6 +22,8 @@ NLS+= fi_FI.ISO8859-1 NLS+= fr_FR.ISO8859-1 NLS+= hu_HU.ISO8859-2 NLS+= it_IT.ISO8859-15 +NLS+= ja_JP.UTF-8 +NLS+= ja_JP.eucJP NLS+= ko_KR.UTF-8 NLS+= ko_KR.eucKR NLS+= mn_MN.UTF-8 diff --git a/lib/libc/nls/ja_JP.UTF-8.msg b/lib/libc/nls/ja_JP.UTF-8.msg new file mode 100644 index 0000000..9ee98d1 --- /dev/null +++ b/lib/libc/nls/ja_JP.UTF-8.msg @@ -0,0 +1,249 @@ +$ $FreeBSD$ +$ +$ Message catalog for ja_JP.UTF-8 locale +$ +$ strerror() support catalog +$ +$set 1 +$ EPERM +1 許å¯ã•ã‚Œã¦ã„ãªã„æ“作ã§ã™ +$ ENOENT +2 ãã®ã‚ˆã†ãªãƒ•ã‚¡ã‚¤ãƒ«ã¾ãŸã¯ãƒ‡ã‚£ãƒ¬ã‚¯ãƒˆãƒªã¯ã‚ã‚Šã¾ã›ã‚“ +$ ESRCH +3 ãã®ã‚ˆã†ãªãƒ—ロセスã¯ã‚ã‚Šã¾ã›ã‚“ +$ EINTR +4 システムコールãŒä¸­æ–­ã•ã‚Œã¾ã—㟠+$ EIO +5 入出力エラーã§ã™ +$ ENXIO +6 デãƒã‚¤ã‚¹ãŒæº–å‚™ã•ã‚Œã¦ã„ã¾ã›ã‚“ +$ E2BIG +7 引数ã®ãƒªã‚¹ãƒˆãŒé•·ã™ãŽã¾ã™ +$ ENOEXEC +8 無効ãªå®Ÿè¡Œå½¢å¼ã§ã™ +$ EBADF +9 無効ãªãƒ•ã‚¡ã‚¤ãƒ«è¨˜è¿°å­ã§ã™ +$ ECHILD +10 å­ãƒ—ロセスãŒã‚ã‚Šã¾ã›ã‚“ +$ EDEADLK +11 リソースデッドロックを回é¿ã—ã¾ã—㟠+$ ENOMEM +12 メモリã®å‰²ã‚Šå½“ã¦ãŒã§ãã¾ã›ã‚“ +$ EACCES +13 パーミッションãŒæ‹’絶ã•ã‚Œã¾ã—㟠+$ EFAULT +14 無効ãªã‚¢ãƒ‰ãƒ¬ã‚¹ã§ã™ +$ ENOTBLK +15 ブロックデãƒã‚¤ã‚¹ãŒè¦æ±‚ã•ã‚Œã¦ã„ã¾ã™ +$ EBUSY +16 デãƒã‚¤ã‚¹ãŒãƒ“ジー状態ã§ã™ +$ EEXIST +17 ファイルãŒå­˜åœ¨ã—ã¾ã™ +$ EXDEV +18 デãƒã‚¤ã‚¹ã‚’ã¾ãŸãリンクã§ã™ +$ ENODEV +19 デãƒã‚¤ã‚¹ãŒå¯¾å¿œã—ã¦ãªã„æ“作ã§ã™ +$ ENOTDIR +20 ディレクトリã§ã¯ã‚ã‚Šã¾ã›ã‚“ +$ EISDIR +21 ディレクトリã§ã™ +$ EINVAL +22 無効ãªå¼•æ•°ã§ã™ +$ ENFILE +23 システム内ã§ã‚ªãƒ¼ãƒ—ンã•ã‚Œã¦ã„るファイルãŒå¤šã™ãŽã¾ã™ +$ EMFILE +24 オープンã—ã¦ã„るファイルãŒå¤šã™ãŽã¾ã™ +$ ENOTTY +25 デãƒã‚¤ã‚¹ãŒå¯¾å¿œã—ã¦ã„ãªã„ ioctl ã§ã™ +$ ETXTBSY +26 テキストファイルãŒãƒ“ジー状態ã§ã™ +$ EFBIG +27 ファイルãŒå¤§ãã™ãŽã¾ã™ +$ ENOSPC +28 デãƒã‚¤ã‚¹ã®ç©ºã領域ä¸è¶³ã§ã™ +$ ESPIPE +29 無効ãªã‚·ãƒ¼ã‚¯ã§ã™ +$ EROFS +30 読ã¿è¾¼ã¿å°‚用ファイルシステムã§ã™ +$ EMLINK +31 リンク数ãŒå¤šã™ãŽã¾ã™ +$ EPIPE +32 パイプãŒç ´å£Šã•ã‚Œã¦ã¾ã—㟠+$ EDOM +33 数値引数ãŒç¯„囲外ã§ã™ +$ ERANGE +34 çµæžœãŒå¤§ãéŽãŽã¾ã™ +$ EAGAIN, EWOULDBLOCK +35 リソースãŒä¸€æ™‚çš„ã«åˆ©ç”¨ã§ãã¾ã›ã‚“ +$ EINPROGRESS +36 æ“作ãŒç¾åœ¨é€²è¡Œä¸­ã§ã™ +$ EALREADY +37 æ“作ã¯æ—¢ã«é€²è¡Œä¸­ã§ã™ +$ ENOTSOCK +38 ソケットã§ãªã„ã‚‚ã®ã«ã¤ã„ã¦ã‚½ã‚±ãƒƒãƒˆæ“作を行ã„ã¾ã—㟠+$ EDESTADDRREQ +39 宛先アドレスãŒè¦æ±‚ã•ã‚Œã¦ã„ã¾ã™ +$ EMSGSIZE +40 メッセージãŒé•·ã™ãŽã¾ã™ +$ EPROTOTYPE +41 ソケットãŒå¯¾å¿œã—ã¦ã„ãªã„プロトコルタイプã§ã™ +$ ENOPROTOOPT +42 利用ã§ããªã„プロトコルã§ã™ +$ EPROTONOSUPPORT +43 対応ã—ã¦ã„ãªã„プロトコルã§ã™ +$ ESOCKTNOSUPPORT +44 対応ã—ã¦ã„ãªã„ソケットタイプã§ã™ +$ EOPNOTSUPP +45 対応ã—ã¦ã„ãªã„æ“作ã§ã™ +$ EPFNOSUPPORT +46 対応ã—ã¦ã„ãªã„プロトコルファミリã§ã™ +$ EAFNOSUPPORT +47 プロトコルファミリãŒå¯¾å¿œã—ã¦ã„ãªã„アドレスファミリãŒæŒ‡å®šã•ã‚Œã¾ã—㟠+$ EADDRINUSE +48 アドレスãŒæ—¢ã«ä½¿ç”¨ä¸­ã§ã™ +$ EADDRNOTAVAIL +49 è¦æ±‚ã•ã‚ŒãŸã‚¢ãƒ‰ãƒ¬ã‚¹ã‚’割り当ã¦ã§ãã¾ã›ã‚“ +$ ENETDOWN +50 ãƒãƒƒãƒˆãƒ¯ãƒ¼ã‚¯ãŒãƒ€ã‚¦ãƒ³ã—ã¦ã„ã¾ã™ +$ ENETUNREACH +51 ãƒãƒƒãƒˆãƒ¯ãƒ¼ã‚¯ã«åˆ°é”ã§ãã¾ã›ã‚“ +$ ENETRESET +52 リセットã«ã‚ˆã‚Šãƒãƒƒãƒˆãƒ¯ãƒ¼ã‚¯ã®æŽ¥ç¶šãŒå¤±ã‚ã‚Œã¾ã—㟠+$ ECONNABORTED +53 ソフトウェアã«ã‚ˆã£ã¦æŽ¥ç¶šãŒåˆ‡æ–­ã•ã‚Œã¾ã—㟠+$ ECONNRESET +54 接続ãŒé€šä¿¡ç›¸æ‰‹ã«ã‚ˆã£ã¦ãƒªã‚»ãƒƒãƒˆã•ã‚Œã¾ã—㟠+$ ENOBUFS +55 ãƒãƒƒãƒ•ã‚¡ã®å®¹é‡ä¸è¶³ã§ã™ +$ EISCONN +56 ソケットã¯æ—¢ã«æŽ¥ç¶šã•ã‚Œã¦ã„ã¾ã™ +$ ENOTCONN +57 ソケットã¯æŽ¥ç¶šã•ã‚Œã¦ã„ã¾ã›ã‚“ +$ ESHUTDOWN +58 ソケットã®ã‚·ãƒ£ãƒƒãƒˆãƒ€ã‚¦ãƒ³ã®å¾Œã§é€ä¿¡ãŒã§ãã¾ã›ã‚“ +$ ETOOMANYREFS +59 処ç†é™ç•Œã‚’超ãˆã‚‹å¤šé‡å‚ç…§ã§ã™ +$ ETIMEDOUT +60 æ“作ãŒã‚¿ã‚¤ãƒ ã‚¢ã‚¦ãƒˆã—ã¾ã—㟠+$ ECONNREFUSED +61 接続ãŒæ‹’絶ã•ã‚Œã¾ã—㟠+$ ELOOP +62 処ç†é™ç•Œã‚’超ãˆã‚‹ã‚·ãƒ³ãƒœãƒªãƒƒã‚¯ãƒªãƒ³ã‚¯ãƒ¬ãƒ™ãƒ«ã§ã™ +$ ENAMETOOLONG +63 ファイルåãŒé•·ã™ãŽã¾ã™ +$ EHOSTDOWN +64 ホストãŒãƒ€ã‚¦ãƒ³ã—ã¦ã„ã¾ã™ +$ EHOSTUNREACH +65 ホストã¸ã®çµŒè·¯ãŒã‚ã‚Šã¾ã›ã‚“ +$ ENOTEMPTY +66 ディレクトリãŒç©ºã§ã¯ã‚ã‚Šã¾ã›ã‚“ +$ EPROCLIM +67 プロセスãŒå¤šã™ãŽã¾ã™ +$ EUSERS +68 ユーザãŒå¤šã™ãŽã¾ã™ +$ EDQUOT +69 ディスククォータãŒè¶…éŽã—ã¾ã—㟠+$ ESTALE +70 失効ã—㟠NFS ファイルãƒãƒ³ãƒ‰ãƒ«ã§ã™ +$ EREMOTE +71 パス中ã®ãƒªãƒ¢ãƒ¼ãƒˆã®ãƒ¬ãƒ™ãƒ«ãŒå¤šã™ãŽã¾ã™ +$ EBADRPC +72 無効㪠RPC 構造体ã§ã™ +$ ERPCMISMATCH +73 RPC ãƒãƒ¼ã‚¸ãƒ§ãƒ³ãŒé–“é•ã£ã¦ã„ã¾ã™ +$ EPROGUNAVAIL +74 RPC プログラムãŒåˆ©ç”¨ã§ãã¾ã›ã‚“ +$ EPROGMISMATCH +75 プログラムã®ãƒãƒ¼ã‚¸ãƒ§ãƒ³ãŒåˆã£ã¦ã„ã¾ã›ã‚“ +$ EPROCUNAVAIL +76 プログラムã§ã¯åˆ©ç”¨ã§ããªã„ procedure ã§ã™ +$ ENOLCK +77 ロックãŒåˆ©ç”¨ã§ãã¾ã›ã‚“ +$ ENOSYS +78 関数ãŒå®Ÿè£…ã•ã‚Œã¦ã„ã¾ã›ã‚“ +$ EFTYPE +79 ファイルã®åž‹ã¾ãŸã¯å½¢å¼ãŒä¸é©åˆ‡ã§ã™ +$ EAUTH +80 èªè¨¼ã‚¨ãƒ©ãƒ¼ã§ã™ +$ ENEEDAUTH +81 èªè¨¼ç‰©ãŒå¿…è¦ã§ã™ +$ EIDRM +82 識別å­ã¯å‰Šé™¤ã•ã‚Œã¾ã—㟠+$ ENOMSG +83 è¦æ±‚ã•ã‚ŒãŸåž‹ã®ãƒ¡ãƒƒã‚»ãƒ¼ã‚¸ãŒã‚ã‚Šã¾ã›ã‚“ +$ EOVERFLOW +84 データ型ã«æ ¼ç´ã™ã‚‹ã«ã¯å¤§ãã™ãŽã‚‹å€¤ã§ã™ +$ ECANCELED +85 処ç†ãŒã‚­ãƒ£ãƒ³ã‚»ãƒ«ã•ã‚Œã¾ã—㟠+$ EILSEQ +86 ä¸æ­£ãªãƒã‚¤ãƒˆåˆ—ã§ã™ +$ ENOATTR +87 ãã®ã‚ˆã†ãªå±žæ€§ã¯ã‚ã‚Šã¾ã›ã‚“ +$ EDOOFUS +88 プログラミングエラーã§ã™ +$ +$ strsignal() support catalog +$ +$set 2 +$ SIGHUP +1 ãƒãƒ³ã‚°ã‚¢ãƒƒãƒ— +$ SIGINT +2 割り込㿠+$ SIGQUIT +3 中断 +$ SIGILL +4 ä¸æ­£å‘½ä»¤ +$ SIGTRAP +5 トレース/BPT トラップ +$ SIGABRT +6 アボートトラップ +$ SIGEMT +7 EMT トラップ +$ SIGFPE +8 浮動å°æ•°ç‚¹ä¾‹å¤– +$ SIGKILL +9 Kill ã•ã‚ŒãŸ +$ SIGBUS +10 ãƒã‚¹ã‚¨ãƒ©ãƒ¼ +$ SIGSEGV +11 セグメンテーションé•å +$ SIGSYS +12 存在ã—ãªã„システムコール +$ SIGPIPE +13 パイプ破壊 +$ SIGALRM +14 アラームクロック +$ SIGTERM +15 終了 +$ SIGURG +16 ç·Šæ€¥å…¥å‡ºåŠ›çŠ¶æ³ +$ SIGSTOP +17 一時åœæ­¢ (シグナル) +$ SIGTSTP +18 一時åœæ­¢ +$ SIGCONT +19 継続 +$ SIGCHLD +20 å­ãƒ—ロセスã®çµ‚了 +$ SIGTTIN +21 一時åœæ­¢ (tty 入力) +$ SIGTTOU +22 一時åœæ­¢ (tty 出力) +$ SIGIO +23 入出力å¯èƒ½ +$ SIGXCPU +24 CPU 時間ã®åˆ¶é™è¶…éŽ +$ SIGXFSZ +25 ファイルサイズã®åˆ¶é™è¶…éŽ +$ SIGVTALRM +26 仮想タイマã®æœŸé™è¶…éŽ +$ SIGPROF +27 プロファイルタイマã®æœŸé™è¶…éŽ +$ SIGWINCH +28 ウィンドウサイズã®å¤‰åŒ– +$ SIGINFO +29 情報è¦æ±‚ +$ SIGUSR1 +30 ユーザ定義シグナル 1 +$ SIGUSR2 +31 ユーザ定義シグナル 2 diff --git a/lib/libc/nls/ja_JP.eucJP.msg b/lib/libc/nls/ja_JP.eucJP.msg new file mode 100644 index 0000000..cba0157 --- /dev/null +++ b/lib/libc/nls/ja_JP.eucJP.msg @@ -0,0 +1,249 @@ +$ $FreeBSD$ +$ +$ Message catalog for ja_JP.eucJP locale +$ +$ strerror() support catalog +$ +$set 1 +$ EPERM +1 µö²Ä¤µ¤ì¤Æ¤¤¤Ê¤¤Áàºî¤Ç¤¹ +$ ENOENT +2 ¤½¤Î¤è¤¦¤Ê¥Õ¥¡¥¤¥ë¤Þ¤¿¤Ï¥Ç¥£¥ì¥¯¥È¥ê¤Ï¤¢¤ê¤Þ¤»¤ó +$ ESRCH +3 ¤½¤Î¤è¤¦¤Ê¥×¥í¥»¥¹¤Ï¤¢¤ê¤Þ¤»¤ó +$ EINTR +4 ¥·¥¹¥Æ¥à¥³¡¼¥ë¤¬ÃæÃǤµ¤ì¤Þ¤·¤¿ +$ EIO +5 Æþ½ÐÎÏ¥¨¥é¡¼¤Ç¤¹ +$ ENXIO +6 ¥Ç¥Ð¥¤¥¹¤¬½àÈ÷¤µ¤ì¤Æ¤¤¤Þ¤»¤ó +$ E2BIG +7 °ú¿ô¤Î¥ê¥¹¥È¤¬Ä¹¤¹¤®¤Þ¤¹ +$ ENOEXEC +8 ̵¸ú¤Ê¼Â¹Ô·Á¼°¤Ç¤¹ +$ EBADF +9 ̵¸ú¤Ê¥Õ¥¡¥¤¥ëµ­½Ò»Ò¤Ç¤¹ +$ ECHILD +10 »Ò¥×¥í¥»¥¹¤¬¤¢¤ê¤Þ¤»¤ó +$ EDEADLK +11 ¥ê¥½¡¼¥¹¥Ç¥Ã¥É¥í¥Ã¥¯¤ò²óÈò¤·¤Þ¤·¤¿ +$ ENOMEM +12 ¥á¥â¥ê¤Î³ä¤êÅö¤Æ¤¬¤Ç¤­¤Þ¤»¤ó +$ EACCES +13 ¥Ñ¡¼¥ß¥Ã¥·¥ç¥ó¤¬µñÀ䤵¤ì¤Þ¤·¤¿ +$ EFAULT +14 ̵¸ú¤Ê¥¢¥É¥ì¥¹¤Ç¤¹ +$ ENOTBLK +15 ¥Ö¥í¥Ã¥¯¥Ç¥Ð¥¤¥¹¤¬Í׵ᤵ¤ì¤Æ¤¤¤Þ¤¹ +$ EBUSY +16 ¥Ç¥Ð¥¤¥¹¤¬¥Ó¥¸¡¼¾õÂ֤Ǥ¹ +$ EEXIST +17 ¥Õ¥¡¥¤¥ë¤¬Â¸ºß¤·¤Þ¤¹ +$ EXDEV +18 ¥Ç¥Ð¥¤¥¹¤ò¤Þ¤¿¤°¥ê¥ó¥¯¤Ç¤¹ +$ ENODEV +19 ¥Ç¥Ð¥¤¥¹¤¬Âбþ¤·¤Æ¤Ê¤¤Áàºî¤Ç¤¹ +$ ENOTDIR +20 ¥Ç¥£¥ì¥¯¥È¥ê¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó +$ EISDIR +21 ¥Ç¥£¥ì¥¯¥È¥ê¤Ç¤¹ +$ EINVAL +22 ̵¸ú¤Ê°ú¿ô¤Ç¤¹ +$ ENFILE +23 ¥·¥¹¥Æ¥àÆâ¤Ç¥ª¡¼¥×¥ó¤µ¤ì¤Æ¤¤¤ë¥Õ¥¡¥¤¥ë¤¬Â¿¤¹¤®¤Þ¤¹ +$ EMFILE +24 ¥ª¡¼¥×¥ó¤·¤Æ¤¤¤ë¥Õ¥¡¥¤¥ë¤¬Â¿¤¹¤®¤Þ¤¹ +$ ENOTTY +25 ¥Ç¥Ð¥¤¥¹¤¬Âбþ¤·¤Æ¤¤¤Ê¤¤ ioctl ¤Ç¤¹ +$ ETXTBSY +26 ¥Æ¥­¥¹¥È¥Õ¥¡¥¤¥ë¤¬¥Ó¥¸¡¼¾õÂ֤Ǥ¹ +$ EFBIG +27 ¥Õ¥¡¥¤¥ë¤¬Â礭¤¹¤®¤Þ¤¹ +$ ENOSPC +28 ¥Ç¥Ð¥¤¥¹¤Î¶õ¤­ÎΰèÉÔ­¤Ç¤¹ +$ ESPIPE +29 ̵¸ú¤Ê¥·¡¼¥¯¤Ç¤¹ +$ EROFS +30 Æɤ߹þ¤ßÀìÍÑ¥Õ¥¡¥¤¥ë¥·¥¹¥Æ¥à¤Ç¤¹ +$ EMLINK +31 ¥ê¥ó¥¯¿ô¤¬Â¿¤¹¤®¤Þ¤¹ +$ EPIPE +32 ¥Ñ¥¤¥×¤¬Ç˲õ¤µ¤ì¤Æ¤Þ¤·¤¿ +$ EDOM +33 ¿ôÃÍ°ú¿ô¤¬Èϰϳ°¤Ç¤¹ +$ ERANGE +34 ·ë²Ì¤¬Â礭²á¤®¤Þ¤¹ +$ EAGAIN, EWOULDBLOCK +35 ¥ê¥½¡¼¥¹¤¬°ì»þŪ¤ËÍøÍѤǤ­¤Þ¤»¤ó +$ EINPROGRESS +36 Áàºî¤¬¸½ºß¿Ê¹ÔÃæ¤Ç¤¹ +$ EALREADY +37 Áàºî¤Ï´û¤Ë¿Ê¹ÔÃæ¤Ç¤¹ +$ ENOTSOCK +38 ¥½¥±¥Ã¥È¤Ç¤Ê¤¤¤â¤Î¤Ë¤Ä¤¤¤Æ¥½¥±¥Ã¥ÈÁàºî¤ò¹Ô¤¤¤Þ¤·¤¿ +$ EDESTADDRREQ +39 °¸À襢¥É¥ì¥¹¤¬Í׵ᤵ¤ì¤Æ¤¤¤Þ¤¹ +$ EMSGSIZE +40 ¥á¥Ã¥»¡¼¥¸¤¬Ä¹¤¹¤®¤Þ¤¹ +$ EPROTOTYPE +41 ¥½¥±¥Ã¥È¤¬Âбþ¤·¤Æ¤¤¤Ê¤¤¥×¥í¥È¥³¥ë¥¿¥¤¥×¤Ç¤¹ +$ ENOPROTOOPT +42 ÍøÍѤǤ­¤Ê¤¤¥×¥í¥È¥³¥ë¤Ç¤¹ +$ EPROTONOSUPPORT +43 Âбþ¤·¤Æ¤¤¤Ê¤¤¥×¥í¥È¥³¥ë¤Ç¤¹ +$ ESOCKTNOSUPPORT +44 Âбþ¤·¤Æ¤¤¤Ê¤¤¥½¥±¥Ã¥È¥¿¥¤¥×¤Ç¤¹ +$ EOPNOTSUPP +45 Âбþ¤·¤Æ¤¤¤Ê¤¤Áàºî¤Ç¤¹ +$ EPFNOSUPPORT +46 Âбþ¤·¤Æ¤¤¤Ê¤¤¥×¥í¥È¥³¥ë¥Õ¥¡¥ß¥ê¤Ç¤¹ +$ EAFNOSUPPORT +47 ¥×¥í¥È¥³¥ë¥Õ¥¡¥ß¥ê¤¬Âбþ¤·¤Æ¤¤¤Ê¤¤¥¢¥É¥ì¥¹¥Õ¥¡¥ß¥ê¤¬»ØÄꤵ¤ì¤Þ¤·¤¿ +$ EADDRINUSE +48 ¥¢¥É¥ì¥¹¤¬´û¤Ë»ÈÍÑÃæ¤Ç¤¹ +$ EADDRNOTAVAIL +49 Í׵ᤵ¤ì¤¿¥¢¥É¥ì¥¹¤ò³ä¤êÅö¤Æ¤Ç¤­¤Þ¤»¤ó +$ ENETDOWN +50 ¥Í¥Ã¥È¥ï¡¼¥¯¤¬¥À¥¦¥ó¤·¤Æ¤¤¤Þ¤¹ +$ ENETUNREACH +51 ¥Í¥Ã¥È¥ï¡¼¥¯¤ËÅþã¤Ç¤­¤Þ¤»¤ó +$ ENETRESET +52 ¥ê¥»¥Ã¥È¤Ë¤è¤ê¥Í¥Ã¥È¥ï¡¼¥¯¤ÎÀܳ¤¬¼º¤ï¤ì¤Þ¤·¤¿ +$ ECONNABORTED +53 ¥½¥Õ¥È¥¦¥§¥¢¤Ë¤è¤Ã¤ÆÀܳ¤¬ÀÚÃǤµ¤ì¤Þ¤·¤¿ +$ ECONNRESET +54 Àܳ¤¬ÄÌ¿®Áê¼ê¤Ë¤è¤Ã¤Æ¥ê¥»¥Ã¥È¤µ¤ì¤Þ¤·¤¿ +$ ENOBUFS +55 ¥Ð¥Ã¥Õ¥¡¤ÎÍÆÎÌÉÔ­¤Ç¤¹ +$ EISCONN +56 ¥½¥±¥Ã¥È¤Ï´û¤ËÀܳ¤µ¤ì¤Æ¤¤¤Þ¤¹ +$ ENOTCONN +57 ¥½¥±¥Ã¥È¤ÏÀܳ¤µ¤ì¤Æ¤¤¤Þ¤»¤ó +$ ESHUTDOWN +58 ¥½¥±¥Ã¥È¤Î¥·¥ã¥Ã¥È¥À¥¦¥ó¤Î¸å¤ÇÁ÷¿®¤¬¤Ç¤­¤Þ¤»¤ó +$ ETOOMANYREFS +59 ½èÍý¸Â³¦¤òĶ¤¨¤ë¿½Å»²¾È¤Ç¤¹ +$ ETIMEDOUT +60 Áàºî¤¬¥¿¥¤¥à¥¢¥¦¥È¤·¤Þ¤·¤¿ +$ ECONNREFUSED +61 Àܳ¤¬µñÀ䤵¤ì¤Þ¤·¤¿ +$ ELOOP +62 ½èÍý¸Â³¦¤òĶ¤¨¤ë¥·¥ó¥Ü¥ê¥Ã¥¯¥ê¥ó¥¯¥ì¥Ù¥ë¤Ç¤¹ +$ ENAMETOOLONG +63 ¥Õ¥¡¥¤¥ë̾¤¬Ä¹¤¹¤®¤Þ¤¹ +$ EHOSTDOWN +64 ¥Û¥¹¥È¤¬¥À¥¦¥ó¤·¤Æ¤¤¤Þ¤¹ +$ EHOSTUNREACH +65 ¥Û¥¹¥È¤Ø¤Î·ÐÏ©¤¬¤¢¤ê¤Þ¤»¤ó +$ ENOTEMPTY +66 ¥Ç¥£¥ì¥¯¥È¥ê¤¬¶õ¤Ç¤Ï¤¢¤ê¤Þ¤»¤ó +$ EPROCLIM +67 ¥×¥í¥»¥¹¤¬Â¿¤¹¤®¤Þ¤¹ +$ EUSERS +68 ¥æ¡¼¥¶¤¬Â¿¤¹¤®¤Þ¤¹ +$ EDQUOT +69 ¥Ç¥£¥¹¥¯¥¯¥©¡¼¥¿¤¬Ä¶²á¤·¤Þ¤·¤¿ +$ ESTALE +70 ¼º¸ú¤·¤¿ NFS ¥Õ¥¡¥¤¥ë¥Ï¥ó¥É¥ë¤Ç¤¹ +$ EREMOTE +71 ¥Ñ¥¹Ãæ¤Î¥ê¥â¡¼¥È¤Î¥ì¥Ù¥ë¤¬Â¿¤¹¤®¤Þ¤¹ +$ EBADRPC +72 ̵¸ú¤Ê RPC ¹½Â¤ÂΤǤ¹ +$ ERPCMISMATCH +73 RPC ¥Ð¡¼¥¸¥ç¥ó¤¬´Ö°ã¤Ã¤Æ¤¤¤Þ¤¹ +$ EPROGUNAVAIL +74 RPC ¥×¥í¥°¥é¥à¤¬ÍøÍѤǤ­¤Þ¤»¤ó +$ EPROGMISMATCH +75 ¥×¥í¥°¥é¥à¤Î¥Ð¡¼¥¸¥ç¥ó¤¬¹ç¤Ã¤Æ¤¤¤Þ¤»¤ó +$ EPROCUNAVAIL +76 ¥×¥í¥°¥é¥à¤Ç¤ÏÍøÍѤǤ­¤Ê¤¤ procedure ¤Ç¤¹ +$ ENOLCK +77 ¥í¥Ã¥¯¤¬ÍøÍѤǤ­¤Þ¤»¤ó +$ ENOSYS +78 ´Ø¿ô¤¬¼ÂÁõ¤µ¤ì¤Æ¤¤¤Þ¤»¤ó +$ EFTYPE +79 ¥Õ¥¡¥¤¥ë¤Î·¿¤Þ¤¿¤Ï·Á¼°¤¬ÉÔŬÀڤǤ¹ +$ EAUTH +80 ǧ¾Ú¥¨¥é¡¼¤Ç¤¹ +$ ENEEDAUTH +81 ǧ¾Úʪ¤¬É¬ÍפǤ¹ +$ EIDRM +82 ¼±Ê̻ҤϺï½ü¤µ¤ì¤Þ¤·¤¿ +$ ENOMSG +83 Í׵ᤵ¤ì¤¿·¿¤Î¥á¥Ã¥»¡¼¥¸¤¬¤¢¤ê¤Þ¤»¤ó +$ EOVERFLOW +84 ¥Ç¡¼¥¿·¿¤Ë³ÊǼ¤¹¤ë¤Ë¤ÏÂ礭¤¹¤®¤ëÃͤǤ¹ +$ ECANCELED +85 ½èÍý¤¬¥­¥ã¥ó¥»¥ë¤µ¤ì¤Þ¤·¤¿ +$ EILSEQ +86 ÉÔÀµ¤Ê¥Ð¥¤¥ÈÎó¤Ç¤¹ +$ ENOATTR +87 ¤½¤Î¤è¤¦¤Ê°À­¤Ï¤¢¤ê¤Þ¤»¤ó +$ EDOOFUS +88 ¥×¥í¥°¥é¥ß¥ó¥°¥¨¥é¡¼¤Ç¤¹ +$ +$ strsignal() support catalog +$ +$set 2 +$ SIGHUP +1 ¥Ï¥ó¥°¥¢¥Ã¥× +$ SIGINT +2 ³ä¤ê¹þ¤ß +$ SIGQUIT +3 ÃæÃÇ +$ SIGILL +4 ÉÔÀµÌ¿Îá +$ SIGTRAP +5 ¥È¥ì¡¼¥¹/BPT ¥È¥é¥Ã¥× +$ SIGABRT +6 ¥¢¥Ü¡¼¥È¥È¥é¥Ã¥× +$ SIGEMT +7 EMT ¥È¥é¥Ã¥× +$ SIGFPE +8 ÉâÆ°¾®¿ôÅÀÎã³° +$ SIGKILL +9 Kill ¤µ¤ì¤¿ +$ SIGBUS +10 ¥Ð¥¹¥¨¥é¡¼ +$ SIGSEGV +11 ¥»¥°¥á¥ó¥Æ¡¼¥·¥ç¥ó°ãÈ¿ +$ SIGSYS +12 ¸ºß¤·¤Ê¤¤¥·¥¹¥Æ¥à¥³¡¼¥ë +$ SIGPIPE +13 ¥Ñ¥¤¥×Ç˲õ +$ SIGALRM +14 ¥¢¥é¡¼¥à¥¯¥í¥Ã¥¯ +$ SIGTERM +15 ½ªÎ» +$ SIGURG +16 ¶ÛµÞÆþ½ÐÎϾõ¶· +$ SIGSTOP +17 °ì»þÄä»ß (¥·¥°¥Ê¥ë) +$ SIGTSTP +18 °ì»þÄä»ß +$ SIGCONT +19 ·Ñ³ +$ SIGCHLD +20 »Ò¥×¥í¥»¥¹¤Î½ªÎ» +$ SIGTTIN +21 °ì»þÄä»ß (tty ÆþÎÏ) +$ SIGTTOU +22 °ì»þÄä»ß (tty ½ÐÎÏ) +$ SIGIO +23 Æþ½ÐÎϲÄǽ +$ SIGXCPU +24 CPU »þ´Ö¤ÎÀ©¸ÂĶ²á +$ SIGXFSZ +25 ¥Õ¥¡¥¤¥ë¥µ¥¤¥º¤ÎÀ©¸ÂĶ²á +$ SIGVTALRM +26 ²¾ÁÛ¥¿¥¤¥Þ¤Î´ü¸ÂĶ²á +$ SIGPROF +27 ¥×¥í¥Õ¥¡¥¤¥ë¥¿¥¤¥Þ¤Î´ü¸ÂĶ²á +$ SIGWINCH +28 ¥¦¥£¥ó¥É¥¦¥µ¥¤¥º¤ÎÊѲ½ +$ SIGINFO +29 ¾ðÊóÍ×µá +$ SIGUSR1 +30 ¥æ¡¼¥¶ÄêµÁ¥·¥°¥Ê¥ë 1 +$ SIGUSR2 +31 ¥æ¡¼¥¶ÄêµÁ¥·¥°¥Ê¥ë 2 -- cgit v1.1 From a638ddfac313cd2604b87003d8112542018502e2 Mon Sep 17 00:00:00 2001 From: brueffer Date: Mon, 16 Nov 2009 08:26:56 +0000 Subject: MFC: r199046 Fix a copy+paste error by checking the correct variable against MM_NULLACT. --- lib/libc/gen/fmtmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/gen/fmtmsg.c b/lib/libc/gen/fmtmsg.c index 6caabbb..e6b1b96 100644 --- a/lib/libc/gen/fmtmsg.c +++ b/lib/libc/gen/fmtmsg.c @@ -128,7 +128,7 @@ printfmt(char *msgverb, long class, const char *label, int sev, size += strlen(sevname); if (text != MM_NULLTXT) size += strlen(text); - if (text != MM_NULLACT) + if (act != MM_NULLACT) size += strlen(act); if (tag != MM_NULLTAG) size += strlen(tag); -- cgit v1.1 From e3c9717703321ed32dfe4b6db831f7ad0d719e81 Mon Sep 17 00:00:00 2001 From: jhb Date: Tue, 17 Nov 2009 16:50:57 +0000 Subject: MFC 198986: Fix a copy-paste bug when reading data from the last 3 (7 for PAE) bytes of a page mapped by a large page in the kernel. --- lib/libkvm/kvm_i386.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/libkvm/kvm_i386.c b/lib/libkvm/kvm_i386.c index 303b4af..ac86ba6 100644 --- a/lib/libkvm/kvm_i386.c +++ b/lib/libkvm/kvm_i386.c @@ -295,9 +295,9 @@ _kvm_vatop(kvm_t *kd, u_long va, off_t *pa) #define PG_FRAME4M (~PAGE4M_MASK) pde_pa = ((u_long)pde & PG_FRAME4M) + (va & PAGE4M_MASK); s = _kvm_pa2off(kd, pde_pa, &ofs); - if (s < sizeof pde) { - _kvm_syserr(kd, kd->program, - "_kvm_vatop: pde_pa not found"); + if (s == 0) { + _kvm_err(kd, kd->program, + "_kvm_vatop: 4MB page address not in dump"); goto invalid; } *pa = ofs; @@ -391,9 +391,9 @@ _kvm_vatop_pae(kvm_t *kd, u_long va, off_t *pa) #define PG_FRAME2M (~PAGE2M_MASK) pde_pa = ((u_long)pde & PG_FRAME2M) + (va & PAGE2M_MASK); s = _kvm_pa2off(kd, pde_pa, &ofs); - if (s < sizeof pde) { - _kvm_syserr(kd, kd->program, - "_kvm_vatop_pae: pde_pa not found"); + if (s == 0) { + _kvm_err(kd, kd->program, + "_kvm_vatop: 2MB page address not in dump"); goto invalid; } *pa = ofs; -- cgit v1.1 From 7fec43a93f7ad0b6a048296d2081ce2ec81a228e Mon Sep 17 00:00:00 2001 From: ume Date: Wed, 18 Nov 2009 14:40:00 +0000 Subject: MFC r199188: ANSIfy. --- lib/libc/net/ip6opt.c | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/libc/net/ip6opt.c b/lib/libc/net/ip6opt.c index 7b65d06..d999fd4 100644 --- a/lib/libc/net/ip6opt.c +++ b/lib/libc/net/ip6opt.c @@ -55,8 +55,7 @@ static void inet6_insert_padopt(u_char *p, int len); * byte, the length byte, and the option data. */ int -inet6_option_space(nbytes) - int nbytes; +inet6_option_space(int nbytes) { nbytes += 2; /* we need space for nxt-hdr and length fields */ return(CMSG_SPACE((nbytes + 7) & ~7)); @@ -68,10 +67,7 @@ inet6_option_space(nbytes) * success or -1 on an error. */ int -inet6_option_init(bp, cmsgp, type) - void *bp; - struct cmsghdr **cmsgp; - int type; +inet6_option_init(void *bp, struct cmsghdr **cmsgp, int type) { struct cmsghdr *ch = (struct cmsghdr *)bp; @@ -98,11 +94,8 @@ inet6_option_init(bp, cmsgp, type) * earlier. It must have a value between 0 and 7, inclusive. */ int -inet6_option_append(cmsg, typep, multx, plusy) - struct cmsghdr *cmsg; - const u_int8_t *typep; - int multx; - int plusy; +inet6_option_append(struct cmsghdr *cmsg, const u_int8_t *typep, int multx, + int plusy) { int padlen, optlen, off; u_char *bp = (u_char *)cmsg + cmsg->cmsg_len; @@ -171,11 +164,7 @@ inet6_option_append(cmsg, typep, multx, plusy) * */ u_int8_t * -inet6_option_alloc(cmsg, datalen, multx, plusy) - struct cmsghdr *cmsg; - int datalen; - int multx; - int plusy; +inet6_option_alloc(struct cmsghdr *cmsg, int datalen, int multx, int plusy) { int padlen, off; u_int8_t *bp = (u_char *)cmsg + cmsg->cmsg_len; @@ -238,9 +227,7 @@ inet6_option_alloc(cmsg, datalen, multx, plusy) * (RFC 2292, 6.3.5) */ int -inet6_option_next(cmsg, tptrp) - const struct cmsghdr *cmsg; - u_int8_t **tptrp; +inet6_option_next(const struct cmsghdr *cmsg, u_int8_t **tptrp) { struct ip6_ext *ip6e; int hdrlen, optlen; @@ -296,10 +283,7 @@ inet6_option_next(cmsg, tptrp) * it's a typo. The variable should be type of u_int8_t **. */ int -inet6_option_find(cmsg, tptrp, type) - const struct cmsghdr *cmsg; - u_int8_t **tptrp; - int type; +inet6_option_find(const struct cmsghdr *cmsg, u_int8_t **tptrp, int type) { struct ip6_ext *ip6e; int hdrlen, optlen; @@ -352,8 +336,7 @@ inet6_option_find(cmsg, tptrp, type) * calculated length and the limitation of the buffer. */ static int -ip6optlen(opt, lim) - u_int8_t *opt, *lim; +ip6optlen(u_int8_t *opt, u_int8_t *lim) { int optlen; -- cgit v1.1 From 1b5e05542f3a6672d37c7d6c078750aebfaaefbc Mon Sep 17 00:00:00 2001 From: brueffer Date: Fri, 20 Nov 2009 06:55:35 +0000 Subject: MFC: r199255, r199257 Improved the manpage description. The committed wording was provided by jhb. Remove a note about vfork(4) going to be eliminated, it's here to stay. --- lib/libc/sys/vfork.2 | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/vfork.2 b/lib/libc/sys/vfork.2 index 928130b..3cd3368 100644 --- a/lib/libc/sys/vfork.2 +++ b/lib/libc/sys/vfork.2 @@ -28,12 +28,12 @@ .\" @(#)vfork.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd June 4, 1993 +.Dd November 13, 2009 .Dt VFORK 2 .Os .Sh NAME .Nm vfork -.Nd spawn new process in a virtual memory efficient way +.Nd create a new process without copying the address space .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -113,14 +113,6 @@ The system call appeared in .Bx 2.9 . .Sh BUGS -This system call will be eliminated when proper system sharing -mechanisms are implemented. -Users should not depend on the memory -sharing semantics of -.Fn vfork -as it will, in that case, be made synonymous to -.Xr fork 2 . -.Pp To avoid a possible deadlock situation, processes that are children in the middle of a -- cgit v1.1 From 7f43a11af011e3802f49582d1176cbb79e9b4e00 Mon Sep 17 00:00:00 2001 From: ume Date: Sun, 22 Nov 2009 17:05:46 +0000 Subject: MFC r199083: Add NLS catalogs support to gai_strerror(3). Controlled by NLS define. --- lib/libc/net/gai_strerror.c | 58 +++++++++++++++++++++++++++++++++++++++++++++ lib/libc/nls/C.msg | 36 ++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) (limited to 'lib') diff --git a/lib/libc/net/gai_strerror.c b/lib/libc/net/gai_strerror.c index 5611559..4f60f2d 100644 --- a/lib/libc/net/gai_strerror.c +++ b/lib/libc/net/gai_strerror.c @@ -30,7 +30,17 @@ #include __FBSDID("$FreeBSD$"); +#include "namespace.h" #include +#if defined(NLS) +#include +#include +#include +#include +#include +#include "reentrant.h" +#endif +#include "un-namespace.h" /* Entries EAI_ADDRFAMILY (1) and EAI_NODATA (7) are obsoleted, but left */ /* for backward compatibility with userland code prior to 2553bis-02 */ @@ -52,9 +62,57 @@ static const char *ai_errlist[] = { "Argument buffer overflow" /* EAI_OVERFLOW */ }; +#if defined(NLS) +static char gai_buf[NL_TEXTMAX]; +static once_t gai_init_once = ONCE_INITIALIZER; +static thread_key_t gai_key; +static int gai_keycreated = 0; + +static void +gai_keycreate(void) +{ + gai_keycreated = (thr_keycreate(&gai_key, free) == 0); +} +#endif + const char * gai_strerror(int ecode) { +#if defined(NLS) + nl_catd catd; + char *buf; + + if (thr_main() != 0) + buf = gai_buf; + else { + if (thr_once(&gai_init_once, gai_keycreate) != 0 || + !gai_keycreated) + goto thr_err; + if ((buf = thr_getspecific(gai_key)) == NULL) { + if ((buf = malloc(sizeof(gai_buf))) == NULL) + goto thr_err; + if (thr_setspecific(gai_key, buf) != 0) { + free(buf); + goto thr_err; + } + } + } + + catd = catopen("libc", NL_CAT_LOCALE); + if (ecode > 0 && ecode < EAI_MAX) + strlcpy(buf, catgets(catd, 3, ecode, ai_errlist[ecode]), + sizeof(gai_buf)); + else if (ecode == 0) + strlcpy(buf, catgets(catd, 3, NL_MSGMAX - 1, "Success"), + sizeof(gai_buf)); + else + strlcpy(buf, catgets(catd, 3, NL_MSGMAX, "Unknown error"), + sizeof(gai_buf)); + catclose(catd); + return buf; + +thr_err: +#endif if (ecode >= 0 && ecode < EAI_MAX) return ai_errlist[ecode]; return "Unknown error"; diff --git a/lib/libc/nls/C.msg b/lib/libc/nls/C.msg index aa2c4cc..a8286f1 100644 --- a/lib/libc/nls/C.msg +++ b/lib/libc/nls/C.msg @@ -247,3 +247,39 @@ $ SIGUSR1 30 User defined signal 1 $ SIGUSR2 31 User defined signal 2 +$ +$ gai_strerror() support catalog +$ +$set 3 +$ 1 (obsolete) +1 Address family for hostname not supported +$ EAI_AGAIN +2 Temporary failure in name resolution +$ EAI_BADFLAGS +3 Invalid value for ai_flags +$ EAI_FAIL +4 Non-recoverable failure in name resolution +$ EAI_FAMILY +5 ai_family not supported +$ EAI_MEMORY +6 Memory allocation failure +$ 7 (obsolete) +7 No address associated with hostname +$ EAI_NONAME +8 hostname nor servname provided, or not known +$ EAI_SERVICE +9 servname not supported for ai_socktype +$ EAI_SOCKTYPE +10 ai_socktype not supported +$ EAI_SYSTEM +11 System error returned in errno +$ EAI_BADHINTS +12 Invalid value for hints +$ EAI_PROTOCOL +13 Resolved protocol is unknown +$ EAI_OVERFLOW +14 Argument buffer overflow +$ 0 +32766 Success +$ NL_MSGMAX +32767 Unknown error -- cgit v1.1 From 449af2da885b66a5779083760a82df7a3fd17c60 Mon Sep 17 00:00:00 2001 From: ume Date: Sun, 22 Nov 2009 17:08:52 +0000 Subject: MFC r199092: Add gai_strerror() catalog for ja_JP.UTF-8 and ja_JP.eucJP. --- lib/libc/nls/ja_JP.UTF-8.msg | 36 ++++++++++++++++++++++++++++++++++++ lib/libc/nls/ja_JP.eucJP.msg | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'lib') diff --git a/lib/libc/nls/ja_JP.UTF-8.msg b/lib/libc/nls/ja_JP.UTF-8.msg index 9ee98d1..b4d2144 100644 --- a/lib/libc/nls/ja_JP.UTF-8.msg +++ b/lib/libc/nls/ja_JP.UTF-8.msg @@ -247,3 +247,39 @@ $ SIGUSR1 30 ユーザ定義シグナル 1 $ SIGUSR2 31 ユーザ定義シグナル 2 +$ +$ gai_strerror() support catalog +$ +$set 3 +$ 1 (obsolete) +1 ホストåã®ã‚¢ãƒ‰ãƒ¬ã‚¹ãƒ•ã‚¡ãƒŸãƒªãƒ¼ã¯ã‚µãƒãƒ¼ãƒˆã•ã‚Œã¾ã›ã‚“ +$ EAI_AGAIN +2 åå‰è§£æ±ºã§ã®ä¸€æ™‚çš„ãªå¤±æ•— +$ EAI_BADFLAGS +3 ai_flags ã®å€¤ãŒç„¡åŠ¹ +$ EAI_FAIL +4 åå‰è§£æ±ºã§ã®å›žå¾©ä¸èƒ½ãªå¤±æ•— +$ EAI_FAMILY +5 ai_family ã¯ã‚µãƒãƒ¼ãƒˆã•ã‚Œã¾ã›ã‚“ +$ EAI_MEMORY +6 メモリ割り当ã¦å¤±æ•— +$ 7 (obsolete) +7 ホストåã«å¯¾å¿œã™ã‚‹ã‚¢ãƒ‰ãƒ¬ã‚¹ã¯ã‚ã‚Šã¾ã›ã‚“ +$ EAI_NONAME +8 ホストåã‹ã‚µãƒ¼ãƒ“スåãŒæŒ‡å®šã•ã‚Œãªã„ã€ã¾ãŸã¯ä¸æ˜Ž +$ EAI_SERVICE +9 サービスå㯠ai_socktype ã«å¯¾ã—ã¦ã‚µãƒãƒ¼ãƒˆã•ã‚Œã¾ã›ã‚“ +$ EAI_SOCKTYPE +10 ai_socktype ã¯ã‚µãƒãƒ¼ãƒˆã•ã‚Œã¾ã›ã‚“ +$ EAI_SYSTEM +11 システムエラーã€errno å‚ç…§ +$ EAI_BADHINTS +12 hints ã®å€¤ãŒç„¡åŠ¹ +$ EAI_PROTOCOL +13 解決ã•ã‚ŒãŸãƒ—ロトコルã¯ä¸æ˜Žã§ã™ +$ EAI_OVERFLOW +14 引数ãƒãƒƒãƒ•ã‚¡ã‚ªãƒ¼ãƒãƒ•ãƒ­ãƒ¼ +$ 0 +32766 æˆåŠŸ +$ NL_MSGMAX +32767 ä¸æ˜Žãªã‚¨ãƒ©ãƒ¼ diff --git a/lib/libc/nls/ja_JP.eucJP.msg b/lib/libc/nls/ja_JP.eucJP.msg index cba0157..461a39b 100644 --- a/lib/libc/nls/ja_JP.eucJP.msg +++ b/lib/libc/nls/ja_JP.eucJP.msg @@ -247,3 +247,39 @@ $ SIGUSR1 30 ¥æ¡¼¥¶ÄêµÁ¥·¥°¥Ê¥ë 1 $ SIGUSR2 31 ¥æ¡¼¥¶ÄêµÁ¥·¥°¥Ê¥ë 2 +$ +$ gai_strerror() support catalog +$ +$set 3 +$ 1 (obsolete) +1 ¥Û¥¹¥È̾¤Î¥¢¥É¥ì¥¹¥Õ¥¡¥ß¥ê¡¼¤Ï¥µ¥Ý¡¼¥È¤µ¤ì¤Þ¤»¤ó +$ EAI_AGAIN +2 ̾Á°²ò·è¤Ç¤Î°ì»þŪ¤Ê¼ºÇÔ +$ EAI_BADFLAGS +3 ai_flags ¤ÎÃͤ¬Ìµ¸ú +$ EAI_FAIL +4 ̾Á°²ò·è¤Ç¤Î²óÉüÉÔǽ¤Ê¼ºÇÔ +$ EAI_FAMILY +5 ai_family ¤Ï¥µ¥Ý¡¼¥È¤µ¤ì¤Þ¤»¤ó +$ EAI_MEMORY +6 ¥á¥â¥ê³ä¤êÅö¤Æ¼ºÇÔ +$ 7 (obsolete) +7 ¥Û¥¹¥È̾¤ËÂбþ¤¹¤ë¥¢¥É¥ì¥¹¤Ï¤¢¤ê¤Þ¤»¤ó +$ EAI_NONAME +8 ¥Û¥¹¥È̾¤«¥µ¡¼¥Ó¥¹Ì¾¤¬»ØÄꤵ¤ì¤Ê¤¤¡¢¤Þ¤¿¤ÏÉÔÌÀ +$ EAI_SERVICE +9 ¥µ¡¼¥Ó¥¹Ì¾¤Ï ai_socktype ¤ËÂФ·¤Æ¥µ¥Ý¡¼¥È¤µ¤ì¤Þ¤»¤ó +$ EAI_SOCKTYPE +10 ai_socktype ¤Ï¥µ¥Ý¡¼¥È¤µ¤ì¤Þ¤»¤ó +$ EAI_SYSTEM +11 ¥·¥¹¥Æ¥à¥¨¥é¡¼¡¢errno »²¾È +$ EAI_BADHINTS +12 hints ¤ÎÃͤ¬Ìµ¸ú +$ EAI_PROTOCOL +13 ²ò·è¤µ¤ì¤¿¥×¥í¥È¥³¥ë¤ÏÉÔÌÀ¤Ç¤¹ +$ EAI_OVERFLOW +14 °ú¿ô¥Ð¥Ã¥Õ¥¡¥ª¡¼¥Ð¥Õ¥í¡¼ +$ 0 +32766 À®¸ù +$ NL_MSGMAX +32767 ÉÔÌÀ¤Ê¥¨¥é¡¼ -- cgit v1.1 From 27cff457a53b959e6782708f0e229d181d6e740e Mon Sep 17 00:00:00 2001 From: brueffer Date: Mon, 23 Nov 2009 09:10:08 +0000 Subject: MFC: r199317 Fix a memory leak in acl_from_text() in case the conversion succeeded. --- lib/libc/posix1e/acl_from_text.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_from_text.c b/lib/libc/posix1e/acl_from_text.c index 98c5426..c600987 100644 --- a/lib/libc/posix1e/acl_from_text.c +++ b/lib/libc/posix1e/acl_from_text.c @@ -257,6 +257,7 @@ acl_from_text(const char *buf_p) } #endif + free(mybuf_p); return(acl); error_label: -- cgit v1.1 From 5bb9e329e2ecafc5d1a5e581a7b4c5f57afa443e Mon Sep 17 00:00:00 2001 From: brueffer Date: Mon, 23 Nov 2009 09:44:08 +0000 Subject: MFC: r199320 Fix grammar. --- lib/libc/locale/nl_langinfo.3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/locale/nl_langinfo.3 b/lib/libc/locale/nl_langinfo.3 index 8de1682..789cac2 100644 --- a/lib/libc/locale/nl_langinfo.3 +++ b/lib/libc/locale/nl_langinfo.3 @@ -53,7 +53,7 @@ with a category corresponding to the category of or to the category .Dv LC_ALL , -may overwrite buffer pointed by the return value. +may overwrite the buffer pointed to by the return value. .Sh RETURN VALUES In a locale where langinfo data is not defined, .Fn nl_langinfo -- cgit v1.1 From 9c731efbb9ec1f6227530c53d4a2682dc0621d22 Mon Sep 17 00:00:00 2001 From: marcel Date: Tue, 24 Nov 2009 03:23:40 +0000 Subject: MFC r198450: Implement _umtx_op_err() for ia64. --- lib/libthr/arch/ia64/Makefile.inc | 2 +- lib/libthr/arch/ia64/ia64/_umtx_op_err.S | 35 +++++++++++++++++++++++++++++++ lib/libthr/arch/ia64/include/pthread_md.h | 2 ++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 lib/libthr/arch/ia64/ia64/_umtx_op_err.S (limited to 'lib') diff --git a/lib/libthr/arch/ia64/Makefile.inc b/lib/libthr/arch/ia64/Makefile.inc index 1c0ce94..160cdf6 100644 --- a/lib/libthr/arch/ia64/Makefile.inc +++ b/lib/libthr/arch/ia64/Makefile.inc @@ -2,4 +2,4 @@ .PATH: ${.CURDIR}/arch/${MACHINE_ARCH}/${MACHINE_ARCH} -SRCS+= pthread_md.c +SRCS+= _umtx_op_err.S pthread_md.c diff --git a/lib/libthr/arch/ia64/ia64/_umtx_op_err.S b/lib/libthr/arch/ia64/ia64/_umtx_op_err.S new file mode 100644 index 0000000..a712210 --- /dev/null +++ b/lib/libthr/arch/ia64/ia64/_umtx_op_err.S @@ -0,0 +1,35 @@ +/*- + * Copyright (c) 2009 Marcel Moolenaar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +ENTRY(_umtx_op_err, 5) + CALLSYS_NOERROR(_umtx_op) + br.ret.sptk.few rp +END(_umtx_op_err) diff --git a/lib/libthr/arch/ia64/include/pthread_md.h b/lib/libthr/arch/ia64/include/pthread_md.h index 0cff7c9..69b3eec 100644 --- a/lib/libthr/arch/ia64/include/pthread_md.h +++ b/lib/libthr/arch/ia64/include/pthread_md.h @@ -33,6 +33,8 @@ #define CPU_SPINWAIT +#define HAS__UMTX_OP_ERR 1 + #define DTV_OFFSET offsetof(struct tcb, tcb_dtv) /* -- cgit v1.1 From 152a6d5bd58ed4ad0ba006ad52ee3a13702c4fb0 Mon Sep 17 00:00:00 2001 From: roam Date: Wed, 25 Nov 2009 10:52:07 +0000 Subject: Merge rev. 199180 to 8.x-STABLE, the change to isblank(3): Fix the grammar as in the PR, and then some. PR: 140454 Submitted by: Jeremy Huddleston --- lib/libc/locale/isblank.3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/locale/isblank.3 b/lib/libc/locale/isblank.3 index 4cc6bbb..6734dcc 100644 --- a/lib/libc/locale/isblank.3 +++ b/lib/libc/locale/isblank.3 @@ -50,9 +50,9 @@ For any locale, this includes the following standard characters: .It "\&``\et''\t`` ''" .El .Pp -In the "C" locale +In the "C" locale, a successful .Fn isblank -successful test is limited to this characters only. +test is limited to these characters only. The value of the argument must be representable as an .Vt "unsigned char" or the value of -- cgit v1.1 From 1f98e49ef9b9bbf5080150bb8c76d67a354231f5 Mon Sep 17 00:00:00 2001 From: fabient Date: Tue, 1 Dec 2009 23:23:52 +0000 Subject: MFC 198339: Fix the NO_PROXY handling. PR: 139751 --- lib/libfetch/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libfetch/common.c b/lib/libfetch/common.c index d49336a..c54804b 100644 --- a/lib/libfetch/common.c +++ b/lib/libfetch/common.c @@ -772,7 +772,7 @@ fetch_no_proxy_match(const char *host) break; d_len = q - p; - if (d_len > 0 && h_len > d_len && + if (d_len > 0 && h_len >= d_len && strncasecmp(host + h_len - d_len, p, d_len) == 0) { /* domain name matches */ -- cgit v1.1 From 42056712b21a97e733139da7d0667caa7cd02c54 Mon Sep 17 00:00:00 2001 From: wollman Date: Wed, 2 Dec 2009 02:47:29 +0000 Subject: MFC revs 199781,199782,199784,199785,199786: Eliminate dead stores. In __mbsconv(), if prec was zero, nconv could have been used uninitialized. Initialize it to a safe value so that there's no chance of returning an error if stack garbage happens to be equal to (size_t)-1 or (size_t)-2. In svc_raw_reply(), don't leave stat uninitialized if the MSG_ACCEPTED && SUCCESS case succeeds. The stack garbage might be zero. In clnt_raw_create(), avoid minor race condition initializing the file-scope variable clntraw_private. Found by: Clang static analyzer --- lib/libc/gen/getcap.c | 5 ++--- lib/libc/gen/getusershell.c | 2 +- lib/libc/gen/wordexp.c | 2 +- lib/libc/rpc/clnt_raw.c | 11 +++++++---- lib/libc/rpc/getnetconfig.c | 4 ++-- lib/libc/rpc/key_call.c | 2 +- lib/libc/rpc/svc_raw.c | 5 ++--- lib/libc/stdio/fgetws.c | 2 +- lib/libc/stdio/fvwrite.c | 2 +- lib/libc/stdio/vfwprintf.c | 2 +- lib/libc/yp/yplib.c | 2 +- 11 files changed, 20 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/getcap.c b/lib/libc/gen/getcap.c index 32d66d48..2700d3d 100644 --- a/lib/libc/gen/getcap.c +++ b/lib/libc/gen/getcap.c @@ -647,7 +647,7 @@ int cgetnext(char **bp, char **db_array) { size_t len; - int done, hadreaderr, i, savederrno, status; + int done, hadreaderr, savederrno, status; char *cp, *line, *rp, *np, buf[BSIZE], nbuf[BSIZE]; u_int dummy; @@ -658,7 +658,7 @@ cgetnext(char **bp, char **db_array) (void)cgetclose(); return (-1); } - for(;;) { + for (;;) { if (toprec && !gottoprec) { gottoprec = 1; line = toprec; @@ -709,7 +709,6 @@ cgetnext(char **bp, char **db_array) /* * Line points to a name line. */ - i = 0; done = 0; np = nbuf; for (;;) { diff --git a/lib/libc/gen/getusershell.c b/lib/libc/gen/getusershell.c index b0da2a1..d09b50c 100644 --- a/lib/libc/gen/getusershell.c +++ b/lib/libc/gen/getusershell.c @@ -124,7 +124,7 @@ _local_initshells(rv, cb_data, ap) if ((fp = fopen(_PATH_SHELLS, "r")) == NULL) return NS_UNAVAIL; - sp = cp = line; + cp = line; while (fgets(cp, MAXPATHLEN + 1, fp) != NULL) { while (*cp != '#' && *cp != '/' && *cp != '\0') cp++; diff --git a/lib/libc/gen/wordexp.c b/lib/libc/gen/wordexp.c index a437543..11fb697 100644 --- a/lib/libc/gen/wordexp.c +++ b/lib/libc/gen/wordexp.c @@ -282,7 +282,7 @@ we_check(const char *words, int flags) if (c == '\0' || level != 0) return (WRDE_SYNTAX); } else - c = *--words; + --words; break; default: break; diff --git a/lib/libc/rpc/clnt_raw.c b/lib/libc/rpc/clnt_raw.c index 9d34a3d..cd3a384 100644 --- a/lib/libc/rpc/clnt_raw.c +++ b/lib/libc/rpc/clnt_raw.c @@ -92,13 +92,13 @@ clnt_raw_create(prog, vers) rpcprog_t prog; rpcvers_t vers; { - struct clntraw_private *clp = clntraw_private; + struct clntraw_private *clp; struct rpc_msg call_msg; - XDR *xdrs = &clp->xdr_stream; - CLIENT *client = &clp->client_object; + XDR *xdrs; + CLIENT *client; mutex_lock(&clntraw_lock); - if (clp == NULL) { + if ((clp = clntraw_private) == NULL) { clp = (struct clntraw_private *)calloc(1, sizeof (*clp)); if (clp == NULL) { mutex_unlock(&clntraw_lock); @@ -110,6 +110,9 @@ clnt_raw_create(prog, vers) clp->_raw_buf = __rpc_rawcombuf; clntraw_private = clp; } + xdrs = &clp->xdr_stream; + client = &clp->client_object; + /* * pre-serialize the static part of the call msg and stash it away */ diff --git a/lib/libc/rpc/getnetconfig.c b/lib/libc/rpc/getnetconfig.c index 9baa224..e5db51a 100644 --- a/lib/libc/rpc/getnetconfig.c +++ b/lib/libc/rpc/getnetconfig.c @@ -412,13 +412,13 @@ void *handlep; * Noone needs these entries anymore, then frees them. * Make sure all info in netconfig_info structure has been reinitialized. */ - q = p = ni.head; + q = ni.head; ni.eof = ni.ref = 0; ni.head = NULL; ni.tail = NULL; mutex_unlock(&ni_lock); - while (q) { + while (q != NULL) { p = q->next; if (q->ncp->nc_lookups != NULL) free(q->ncp->nc_lookups); free(q->ncp); diff --git a/lib/libc/rpc/key_call.c b/lib/libc/rpc/key_call.c index 615f24d..6cc1139 100644 --- a/lib/libc/rpc/key_call.c +++ b/lib/libc/rpc/key_call.c @@ -302,7 +302,7 @@ int vers; void *localhandle; struct netconfig *nconf; struct netconfig *tpconf; - struct key_call_private *kcp = key_call_private_main; + struct key_call_private *kcp; struct timeval wait_time; struct utsname u; int main_thread; diff --git a/lib/libc/rpc/svc_raw.c b/lib/libc/rpc/svc_raw.c index 7492046..67bcba1 100644 --- a/lib/libc/rpc/svc_raw.c +++ b/lib/libc/rpc/svc_raw.c @@ -176,9 +176,8 @@ svc_raw_reply(xprt, msg) msg->acpted_rply.ar_results.proc = (xdrproc_t) xdr_void; msg->acpted_rply.ar_results.where = NULL; - if (!xdr_replymsg(xdrs, msg) || - !SVCAUTH_WRAP(&SVC_AUTH(xprt), xdrs, xdr_proc, xdr_where)) - stat = FALSE; + stat = xdr_replymsg(xdrs, msg) && + SVCAUTH_WRAP(&SVC_AUTH(xprt), xdrs, xdr_proc, xdr_where); } else { stat = xdr_replymsg(xdrs, msg); } diff --git a/lib/libc/stdio/fgetws.c b/lib/libc/stdio/fgetws.c index bbc0299..550843d 100644 --- a/lib/libc/stdio/fgetws.c +++ b/lib/libc/stdio/fgetws.c @@ -89,7 +89,7 @@ fgetws(wchar_t * __restrict ws, int n, FILE * __restrict fp) if (!__mbsinit(&fp->_mbstate)) /* Incomplete character */ goto error; - *wsp++ = L'\0'; + *wsp = L'\0'; FUNLOCKFILE(fp); return (ws); diff --git a/lib/libc/stdio/fvwrite.c b/lib/libc/stdio/fvwrite.c index fd69eb9..7206676 100644 --- a/lib/libc/stdio/fvwrite.c +++ b/lib/libc/stdio/fvwrite.c @@ -60,7 +60,7 @@ __sfvwrite(fp, uio) char *nl; int nlknown, nldist; - if ((len = uio->uio_resid) == 0) + if (uio->uio_resid == 0) return (0); /* make sure we can write */ if (prepwrite(fp) != 0) diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c index f3768de..d34f559 100644 --- a/lib/libc/stdio/vfwprintf.c +++ b/lib/libc/stdio/vfwprintf.c @@ -293,7 +293,7 @@ __mbsconv(char *mbsarg, int prec) * number of characters to print. */ p = mbsarg; - insize = nchars = 0; + insize = nchars = nconv = 0; mbs = initial_mbs; while (nchars != (size_t)prec) { nconv = mbrlen(p, MB_CUR_MAX, &mbs); diff --git a/lib/libc/yp/yplib.c b/lib/libc/yp/yplib.c index eb5c34c..87d16dd 100644 --- a/lib/libc/yp/yplib.c +++ b/lib/libc/yp/yplib.c @@ -241,7 +241,7 @@ static bool_t ypmatch_cache_lookup(struct dom_binding *ypdb, char *map, keydat *key, valdat *val) { - struct ypmatch_ent *c = ypdb->cache; + struct ypmatch_ent *c; ypmatch_cache_expire(ypdb); -- cgit v1.1 From c28b64e0d9f31cb4da220ab09958fd4adc90e69d Mon Sep 17 00:00:00 2001 From: brueffer Date: Fri, 4 Dec 2009 07:08:15 +0000 Subject: MFC: r199988 Add an .Nm for strncat. --- lib/libc/string/strcat.3 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/string/strcat.3 b/lib/libc/string/strcat.3 index 0290994..4c9fec9 100644 --- a/lib/libc/string/strcat.3 +++ b/lib/libc/string/strcat.3 @@ -32,11 +32,12 @@ .\" @(#)strcat.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd June 4, 1993 +.Dd December 1, 2009 .Dt STRCAT 3 .Os .Sh NAME -.Nm strcat +.Nm strcat , +.Nm strncat .Nd concatenate strings .Sh LIBRARY .Lb libc -- cgit v1.1 From 38a74188d15dc3ef17034d3ed9f7b3fbe217852f Mon Sep 17 00:00:00 2001 From: jh Date: Fri, 4 Dec 2009 11:26:52 +0000 Subject: MFC r199843: Clarify that the value of the fts_info field is different in post-order. Approved by: trasz (mentor) --- lib/libc/gen/fts.3 | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/fts.3 b/lib/libc/gen/fts.3 index 8e1a1e4..d2473be 100644 --- a/lib/libc/gen/fts.3 +++ b/lib/libc/gen/fts.3 @@ -28,7 +28,7 @@ .\" @(#)fts.3 8.5 (Berkeley) 4/16/94 .\" $FreeBSD$ .\" -.Dd January 26, 2008 +.Dd November 25, 2009 .Dt FTS 3 .Os .Sh NAME @@ -198,10 +198,9 @@ A directory being visited in post-order. The contents of the .Vt FTSENT structure will be unchanged from when -it was returned in pre-order, i.e., with the +the directory was visited in pre-order, except for the .Fa fts_info -field set to -.Dv FTS_D . +field. .It Dv FTS_ERR This is an error return, and the .Fa fts_errno -- cgit v1.1 From 474d50676beda16b58d9b04cf350f9d2e3daf0bd Mon Sep 17 00:00:00 2001 From: jilles Date: Sun, 6 Dec 2009 22:14:58 +0000 Subject: MFC r198406: wordexp(3): fix some bugs with signals and long outputs * retry various system calls on EINTR * retry the rest after a short read (common if there is more than about 1K of output) * block SIGCHLD like system(3) does (note that this does not and cannot work fully in threaded programs, they will need to be careful with wait functions) PR: 90580 --- lib/libc/gen/wordexp.c | 76 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/wordexp.c b/lib/libc/gen/wordexp.c index 11fb697..bcab1f5 100644 --- a/lib/libc/gen/wordexp.c +++ b/lib/libc/gen/wordexp.c @@ -28,8 +28,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -73,6 +75,24 @@ wordexp(const char * __restrict words, wordexp_t * __restrict we, int flags) return (0); } +static size_t +we_read_fully(int fd, char *buffer, size_t len) +{ + size_t done; + ssize_t nread; + + done = 0; + do { + nread = _read(fd, buffer + done, len - done); + if (nread == -1 && errno == EINTR) + continue; + if (nread <= 0) + break; + done += nread; + } while (done != len); + return done; +} + /* * we_askshell -- * Use the `wordexp' /bin/sh builtin function to do most of the work @@ -90,20 +110,31 @@ we_askshell(const char *words, wordexp_t *we, int flags) size_t sofs; /* Offset into we->we_strings */ size_t vofs; /* Offset into we->we_wordv */ pid_t pid; /* Process ID of child */ + pid_t wpid; /* waitpid return value */ int status; /* Child exit status */ + int error; /* Our return value */ + int serrno; /* errno to return */ char *ifs; /* IFS env. var. */ char *np, *p; /* Handy pointers */ char *nstrings; /* Temporary for realloc() */ char **nwv; /* Temporary for realloc() */ + sigset_t newsigblock, oldsigblock; + serrno = errno; if ((ifs = getenv("IFS")) == NULL) ifs = " \t\n"; if (pipe(pdes) < 0) return (WRDE_NOSPACE); /* XXX */ + (void)sigemptyset(&newsigblock); + (void)sigaddset(&newsigblock, SIGCHLD); + (void)_sigprocmask(SIG_BLOCK, &newsigblock, &oldsigblock); if ((pid = fork()) < 0) { + serrno = errno; _close(pdes[0]); _close(pdes[1]); + (void)_sigprocmask(SIG_SETMASK, &oldsigblock, NULL); + errno = serrno; return (WRDE_NOSPACE); /* XXX */ } else if (pid == 0) { @@ -114,6 +145,7 @@ we_askshell(const char *words, wordexp_t *we, int flags) int devnull; char *cmd; + (void)_sigprocmask(SIG_SETMASK, &oldsigblock, NULL); _close(pdes[0]); if (_dup2(pdes[1], STDOUT_FILENO) < 0) _exit(1); @@ -139,10 +171,11 @@ we_askshell(const char *words, wordexp_t *we, int flags) * the expanded words separated by nulls. */ _close(pdes[1]); - if (_read(pdes[0], wbuf, 8) != 8 || _read(pdes[0], bbuf, 8) != 8) { - _close(pdes[0]); - _waitpid(pid, &status, 0); - return (flags & WRDE_UNDEF ? WRDE_BADVAL : WRDE_SYNTAX); + if (we_read_fully(pdes[0], wbuf, 8) != 8 || + we_read_fully(pdes[0], bbuf, 8) != 8) { + error = flags & WRDE_UNDEF ? WRDE_BADVAL : WRDE_SYNTAX; + serrno = errno; + goto cleanup; } wbuf[8] = bbuf[8] = '\0'; nwords = strtol(wbuf, NULL, 16); @@ -162,33 +195,38 @@ we_askshell(const char *words, wordexp_t *we, int flags) if ((nwv = realloc(we->we_wordv, (we->we_wordc + 1 + (flags & WRDE_DOOFFS ? we->we_offs : 0)) * sizeof(char *))) == NULL) { - _close(pdes[0]); - _waitpid(pid, &status, 0); - return (WRDE_NOSPACE); + error = WRDE_NOSPACE; + goto cleanup; } we->we_wordv = nwv; if ((nstrings = realloc(we->we_strings, we->we_nbytes)) == NULL) { - _close(pdes[0]); - _waitpid(pid, &status, 0); - return (WRDE_NOSPACE); + error = WRDE_NOSPACE; + goto cleanup; } for (i = 0; i < vofs; i++) if (we->we_wordv[i] != NULL) we->we_wordv[i] += nstrings - we->we_strings; we->we_strings = nstrings; - if (_read(pdes[0], we->we_strings + sofs, nbytes) != nbytes) { - _close(pdes[0]); - _waitpid(pid, &status, 0); - return (flags & WRDE_UNDEF ? WRDE_BADVAL : WRDE_SYNTAX); + if (we_read_fully(pdes[0], we->we_strings + sofs, nbytes) != nbytes) { + error = flags & WRDE_UNDEF ? WRDE_BADVAL : WRDE_SYNTAX; + serrno = errno; + goto cleanup; } - if (_waitpid(pid, &status, 0) < 0 || !WIFEXITED(status) || - WEXITSTATUS(status) != 0) { - _close(pdes[0]); - return (flags & WRDE_UNDEF ? WRDE_BADVAL : WRDE_SYNTAX); - } + error = 0; +cleanup: _close(pdes[0]); + do + wpid = _waitpid(pid, &status, 0); + while (wpid < 0 && errno == EINTR); + (void)_sigprocmask(SIG_SETMASK, &oldsigblock, NULL); + if (error != 0) { + errno = serrno; + return (error); + } + if (wpid < 0 || !WIFEXITED(status) || WEXITSTATUS(status) != 0) + return (flags & WRDE_UNDEF ? WRDE_BADVAL : WRDE_SYNTAX); /* * Break the null-terminated expanded word strings out into -- cgit v1.1 From 6b90f6c201275c1edfa6b986407d0f94475c1e68 Mon Sep 17 00:00:00 2001 From: brueffer Date: Mon, 7 Dec 2009 08:38:44 +0000 Subject: MFC: r200019 Fix the dprintf() prototype. --- lib/libc/stdio/printf.3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdio/printf.3 b/lib/libc/stdio/printf.3 index 27d5bf0..2587aa2 100644 --- a/lib/libc/stdio/printf.3 +++ b/lib/libc/stdio/printf.3 @@ -32,7 +32,7 @@ .\" @(#)printf.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd March 3, 2009 +.Dd December 2, 2009 .Dt PRINTF 3 .Os .Sh NAME @@ -55,7 +55,7 @@ .Ft int .Fn asprintf "char **ret" "const char *format" ... .Ft int -.Fn dprintf "int" "const char * restrict format" ... +.Fn dprintf "int fd" "const char * restrict format" ... .In stdarg.h .Ft int .Fn vprintf "const char * restrict format" "va_list ap" -- cgit v1.1 From f95fa4d4d01b6911513c52f88083a311657bda24 Mon Sep 17 00:00:00 2001 From: jhb Date: Mon, 7 Dec 2009 19:07:45 +0000 Subject: MFC 200061: The fd_mask type is an unsigned long, not an int, so treat the mask as a long instead of an int when examining the results of select() to look for RPC requests. Previously this routine would ignore RPC requests to sockets whose file descriptor mod 64 was greater than 31 on a 64-bit platform. --- lib/libc/rpc/svc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/rpc/svc.c b/lib/libc/rpc/svc.c index d205121..282c2be 100644 --- a/lib/libc/rpc/svc.c +++ b/lib/libc/rpc/svc.c @@ -627,8 +627,8 @@ svc_getreqset(readfds) maskp = readfds->fds_bits; for (sock = 0; sock < FD_SETSIZE; sock += NFDBITS) { - for (mask = *maskp++; (bit = ffs(mask)) != 0; - mask ^= (1 << (bit - 1))) { + for (mask = *maskp++; (bit = ffsl(mask)) != 0; + mask ^= (1ul << (bit - 1))) { /* sock has input waiting */ fd = sock + bit - 1; svc_getreq_common(fd); -- cgit v1.1 From 3c2bafe65e8458fc5abe508bf9e336768eda01bb Mon Sep 17 00:00:00 2001 From: delphij Date: Mon, 7 Dec 2009 19:59:28 +0000 Subject: MFC revision 197579 and 199617: Add two new fcntls to enable/disable read-ahead: - F_READAHEAD: specify the amount for sequential access. The amount is specified in bytes and is rounded up to nearest block size. - F_RDAHEAD: Darwin compatible version that use 128KB as the sequential access size. A third argument of zero disables the read-ahead behavior. Please note that the read-ahead amount is also constrainted by sysctl variable, vfs.read_max, which may need to be raised in order to better utilize this feature. Thanks Igor Sysoev for proposing the feature and submitting the original version, and kib@ for his valuable comments. --- lib/libc/sys/fcntl.2 | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2 index a16724c..250cef4 100644 --- a/lib/libc/sys/fcntl.2 +++ b/lib/libc/sys/fcntl.2 @@ -28,7 +28,7 @@ .\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94 .\" $FreeBSD$ .\" -.Dd March 8, 2008 +.Dd September 28, 2009 .Dt FCNTL 2 .Os .Sh NAME @@ -241,6 +241,22 @@ will be interrupted if the signal handler has not specified the .Dv SA_RESTART (see .Xr sigaction 2 ) . +.It Dv F_READAHEAD +Set or clear the read ahead amount for sequential access to the third +argument, +.Fa arg , +which is rounded up to the nearest block size. +A zero value in +.Fa arg +turns off read ahead. +.It Dv F_RDAHEAD +Equivalent to Darwin counterpart which sets read ahead amount of 128KB +when the third argument, +.Fa arg +is non-zero. +A zero value in +.Fa arg +turns off read ahead. .El .Pp When a shared lock has been set on a segment of a file, -- cgit v1.1 From 14c9615c2ae5920e4cc43af1d15d6cbee9ab7349 Mon Sep 17 00:00:00 2001 From: fabient Date: Mon, 7 Dec 2009 20:44:43 +0000 Subject: MFC 198433: Not all Intel Core (TM) CPUs implement PMC_CLASS_IAF fixed-function counters. For such CPUs, use an alternate mapping of convenience names to events supported by PMC_CLASS_IAP programmable counters. --- lib/libpmc/libpmc.c | 59 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 76bc89d..ecc697d 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -442,6 +442,10 @@ static struct pmc_event_alias core_aliases[] = { /* * Intel Core2 (Family 6, Model F), Core2Extreme (Family 6, Model 17H) * and Atom (Family 6, model 1CH) PMCs. + * + * We map aliases to events on the fixed-function counters if these + * are present. Note that not all CPUs in this family contain fixed-function + * counters. */ static struct pmc_event_alias core2_aliases[] = { @@ -454,8 +458,22 @@ static struct pmc_event_alias core2_aliases[] = { EV_ALIAS("unhalted-cycles", "iaf-cpu-clk-unhalted.core"), EV_ALIAS(NULL, NULL) }; -#define atom_aliases core2_aliases -#define corei7_aliases core2_aliases + +static struct pmc_event_alias core2_aliases_without_iaf[] = { + EV_ALIAS("branches", "iap-br-inst-retired.any"), + EV_ALIAS("branch-mispredicts", "iap-br-inst-retired.mispred"), + EV_ALIAS("cycles", "tsc-tsc"), + EV_ALIAS("ic-misses", "iap-l1i-misses"), + EV_ALIAS("instructions", "iap-inst-retired.any_p"), + EV_ALIAS("interrupts", "iap-hw-int-rcv"), + EV_ALIAS("unhalted-cycles", "iap-cpu-clk-unhalted.core_p"), + EV_ALIAS(NULL, NULL) +}; + +#define atom_aliases core2_aliases +#define atom_aliases_without_iaf core2_aliases_without_iaf +#define corei7_aliases core2_aliases +#define corei7_aliases_without_iaf core2_aliases_without_iaf #define IAF_KW_OS "os" #define IAF_KW_USR "usr" @@ -2379,6 +2397,10 @@ pmc_init(void) uint32_t abi_version; struct module_stat pmc_modstat; struct pmc_op_getcpuinfo op_cpu_info; +#if defined(__amd64__) || defined(__i386__) + int cpu_has_iaf_counters; + unsigned int t; +#endif if (pmc_syscall != -1) /* already inited */ return (0); @@ -2420,6 +2442,8 @@ pmc_init(void) if (pmc_class_table == NULL) return (-1); + for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++) + pmc_class_table[n] = NULL; /* * Fill in the class table. @@ -2427,6 +2451,14 @@ pmc_init(void) n = 0; #if defined(__amd64__) || defined(__i386__) pmc_class_table[n++] = &tsc_class_table_descr; + + /* + * Check if this CPU has fixed function counters. + */ + cpu_has_iaf_counters = 0; + for (t = 0; t < cpu_info.pm_nclass; t++) + if (cpu_info.pm_classes[t].pm_class == PMC_CLASS_IAF) + cpu_has_iaf_counters = 1; #endif #define PMC_MDEP_INIT(C) do { \ @@ -2436,6 +2468,16 @@ pmc_init(void) PMC_TABLE_SIZE(C##_pmc_classes); \ } while (0) +#define PMC_MDEP_INIT_INTEL_V2(C) do { \ + PMC_MDEP_INIT(C); \ + if (cpu_has_iaf_counters) \ + pmc_class_table[n++] = &iaf_class_table_descr; \ + else \ + pmc_mdep_event_aliases = \ + C##_aliases_without_iaf; \ + pmc_class_table[n] = &C##_class_table_descr; \ + } while (0) + /* Configure the event name parser. */ switch (cpu_info.pm_cputype) { #if defined(__i386__) @@ -2461,24 +2503,17 @@ pmc_init(void) pmc_class_table[n] = &k8_class_table_descr; break; case PMC_CPU_INTEL_ATOM: - PMC_MDEP_INIT(atom); - pmc_class_table[n++] = &iaf_class_table_descr; - pmc_class_table[n] = &atom_class_table_descr; + PMC_MDEP_INIT_INTEL_V2(atom); break; case PMC_CPU_INTEL_CORE: PMC_MDEP_INIT(core); - pmc_class_table[n] = &core_class_table_descr; break; case PMC_CPU_INTEL_CORE2: case PMC_CPU_INTEL_CORE2EXTREME: - PMC_MDEP_INIT(core2); - pmc_class_table[n++] = &iaf_class_table_descr; - pmc_class_table[n] = &core2_class_table_descr; + PMC_MDEP_INIT_INTEL_V2(core2); break; case PMC_CPU_INTEL_COREI7: - PMC_MDEP_INIT(corei7); - pmc_class_table[n++] = &iaf_class_table_descr; - pmc_class_table[n] = &corei7_class_table_descr; + PMC_MDEP_INIT_INTEL_V2(corei7); break; case PMC_CPU_INTEL_PIV: PMC_MDEP_INIT(p4); -- cgit v1.1 From c57856a7abab4b7e75e856abab338a796b2a141b Mon Sep 17 00:00:00 2001 From: thompsa Date: Wed, 9 Dec 2009 21:34:38 +0000 Subject: MFC r199575 - fix a transfer cancelling bug/segfault [1] - correct a return code in the transfer cancel function. - add new API function, libusb20_tr_bulk_intr_sync(). Submitted by: HPS Reported by: Robert Jenssen [1] --- lib/libusb/libusb10.c | 41 +++++++++++++++++++---- lib/libusb/libusb10.h | 4 ++- lib/libusb/libusb20.3 | 27 ++++++++++++++- lib/libusb/libusb20.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++- lib/libusb/libusb20.h | 3 +- 5 files changed, 155 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/libusb/libusb10.c b/lib/libusb/libusb10.c index 55ee1c5..9a5154e 100644 --- a/lib/libusb/libusb10.c +++ b/lib/libusb/libusb10.c @@ -833,8 +833,12 @@ libusb10_complete_transfer(struct libusb20_transfer *pxfer, if (pxfer != NULL) libusb20_tr_set_priv_sc1(pxfer, NULL); + /* set transfer status */ uxfer->status = status; + /* update super transfer state */ + sxfer->state = LIBUSB_SUPER_XFER_ST_NONE; + dev = libusb_get_device(uxfer->dev_handle); TAILQ_INSERT_TAIL(&dev->ctx->tr_done, sxfer, entry); @@ -1111,6 +1115,8 @@ libusb10_submit_transfer_sub(struct libusb20_device *pdev, uint8_t endpoint) return; case 2: sxfer = libusb20_tr_get_priv_sc1(pxfer1); + if (sxfer == NULL) + return; /* cancelling */ if (sxfer->rem_len) return; /* cannot queue another one */ /* swap transfers */ @@ -1118,6 +1124,8 @@ libusb10_submit_transfer_sub(struct libusb20_device *pdev, uint8_t endpoint) break; case 1: sxfer = libusb20_tr_get_priv_sc1(pxfer0); + if (sxfer == NULL) + return; /* cancelling */ if (sxfer->rem_len) return; /* cannot queue another one */ /* swap transfers */ @@ -1229,12 +1237,18 @@ libusb_submit_transfer(struct libusb_transfer *uxfer) if (pxfer0 == NULL || pxfer1 == NULL) { err = LIBUSB_ERROR_OTHER; } else if ((sxfer->entry.tqe_prev != NULL) || - (libusb20_tr_get_priv_sc1(pxfer0) == sxfer) || + (libusb20_tr_get_priv_sc1(pxfer0) == sxfer) || (libusb20_tr_get_priv_sc1(pxfer1) == sxfer)) { err = LIBUSB_ERROR_BUSY; } else { + + /* set pending state */ + sxfer->state = LIBUSB_SUPER_XFER_ST_PEND; + + /* insert transfer into transfer head list */ TAILQ_INSERT_TAIL(&dev->tr_head, sxfer, entry); + /* start work transfers */ libusb10_submit_transfer_sub( uxfer->dev_handle, endpoint); @@ -1258,12 +1272,14 @@ libusb_cancel_transfer(struct libusb_transfer *uxfer) struct libusb_super_transfer *sxfer; struct libusb_device *dev; uint32_t endpoint; + int retval; if (uxfer == NULL) return (LIBUSB_ERROR_INVALID_PARAM); + /* check if not initialised */ if (uxfer->dev_handle == NULL) - return (LIBUSB_ERROR_INVALID_PARAM); + return (LIBUSB_ERROR_NOT_FOUND); endpoint = uxfer->endpoint; @@ -1277,39 +1293,50 @@ libusb_cancel_transfer(struct libusb_transfer *uxfer) sxfer = (struct libusb_super_transfer *)( (uint8_t *)uxfer - sizeof(*sxfer)); + retval = 0; + CTX_LOCK(dev->ctx); pxfer0 = libusb10_get_transfer(uxfer->dev_handle, endpoint, 0); pxfer1 = libusb10_get_transfer(uxfer->dev_handle, endpoint, 1); - if (sxfer->entry.tqe_prev != NULL) { + if (sxfer->state != LIBUSB_SUPER_XFER_ST_PEND) { + /* only update the transfer status */ + uxfer->status = LIBUSB_TRANSFER_CANCELLED; + retval = LIBUSB_ERROR_NOT_FOUND; + } else if (sxfer->entry.tqe_prev != NULL) { /* we are lucky - transfer is on a queue */ TAILQ_REMOVE(&dev->tr_head, sxfer, entry); sxfer->entry.tqe_prev = NULL; - libusb10_complete_transfer(NULL, sxfer, LIBUSB_TRANSFER_CANCELLED); + libusb10_complete_transfer(NULL, + sxfer, LIBUSB_TRANSFER_CANCELLED); } else if (pxfer0 == NULL || pxfer1 == NULL) { /* not started */ + retval = LIBUSB_ERROR_NOT_FOUND; } else if (libusb20_tr_get_priv_sc1(pxfer0) == sxfer) { - libusb10_complete_transfer(pxfer0, sxfer, LIBUSB_TRANSFER_CANCELLED); + libusb10_complete_transfer(pxfer0, + sxfer, LIBUSB_TRANSFER_CANCELLED); libusb20_tr_stop(pxfer0); /* make sure the queue doesn't stall */ libusb10_submit_transfer_sub( uxfer->dev_handle, endpoint); } else if (libusb20_tr_get_priv_sc1(pxfer1) == sxfer) { - libusb10_complete_transfer(pxfer1, sxfer, LIBUSB_TRANSFER_CANCELLED); + libusb10_complete_transfer(pxfer1, + sxfer, LIBUSB_TRANSFER_CANCELLED); libusb20_tr_stop(pxfer1); /* make sure the queue doesn't stall */ libusb10_submit_transfer_sub( uxfer->dev_handle, endpoint); } else { /* not started */ + retval = LIBUSB_ERROR_NOT_FOUND; } CTX_UNLOCK(dev->ctx); DPRINTF(dev->ctx, LIBUSB_DEBUG_FUNCTION, "libusb_cancel_transfer leave"); - return (0); + return (retval); } UNEXPORTED void diff --git a/lib/libusb/libusb10.h b/lib/libusb/libusb10.h index 338c2aa..d2a2bd7 100644 --- a/lib/libusb/libusb10.h +++ b/lib/libusb/libusb10.h @@ -65,7 +65,9 @@ struct libusb_super_transfer { uint8_t *curr_data; uint32_t rem_len; uint32_t last_len; - uint8_t flags; + uint8_t state; +#define LIBUSB_SUPER_XFER_ST_NONE 0 +#define LIBUSB_SUPER_XFER_ST_PEND 1 }; struct libusb_context { diff --git a/lib/libusb/libusb20.3 b/lib/libusb/libusb20.3 index f902883..5404013 100644 --- a/lib/libusb/libusb20.3 +++ b/lib/libusb/libusb20.3 @@ -26,7 +26,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 22, 2009 +.Dd November 18, 2009 .Dt LIBUSB20 3 .Os .Sh NAME @@ -98,6 +98,8 @@ USB access library (libusb -lusb) .Fn libusb20_tr_setup_intr "struct libusb20_transfer *xfer" "void *pbuf" "uint32_t length" "uint32_t timeout" .Ft void .Fn libusb20_tr_setup_isoc "struct libusb20_transfer *xfer" "void *pbuf" "uint32_t length" "uint61_t fr_index" +.Ft uint8_t +.Fn libusb20_tr_bulk_intr_sync "struct libusb20_transfer *xfer" "void *pbuf" "uint32_t length" "uint32_t *pactlen" "uint32_t timeout" .Ft void .Fn libusb20_tr_start "struct libusb20_transfer *xfer" .Ft void @@ -451,6 +453,29 @@ is a helper function for setting up a multi frame USB ISOCHRONOUS transfer. . .Pp . +.Fn libusb20_tr_bulk_intr_sync +will perform a synchronous BULK or INTERRUPT transfer having length given by the +.Fa length +argument and buffer pointer given by the +.Fa pbuf +argument on the USB transfer given by the +.Fa xfer +argument. +. +If the +.Fa pactlen +argument is non-NULL the actual transfer length will be stored at the given pointer destination. +. +If the +.Fa timeout +argument is non-zero the transfer will timeout after the given value in milliseconds. +. +This function does not change the transfer flags, like short packet not ok. +. +This function returns zero on success else a LIBUSB20_TRANSFER_XXX value is returned. +. +.Pp +. .Fn libusb20_tr_start will get the USB transfer started, if not already started. diff --git a/lib/libusb/libusb20.c b/lib/libusb/libusb20.c index 1a33805..41f9ac7 100644 --- a/lib/libusb/libusb20.c +++ b/lib/libusb/libusb20.c @@ -1,6 +1,6 @@ /* $FreeBSD$ */ /*- - * Copyright (c) 2008 Hans Petter Selasky. All rights reserved. + * Copyright (c) 2008-2009 Hans Petter Selasky. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -263,6 +263,10 @@ libusb20_tr_get_priv_sc1(struct libusb20_transfer *xfer) void libusb20_tr_stop(struct libusb20_transfer *xfer) { + if (!xfer->is_opened) { + /* transfer is not opened */ + return; + } if (!xfer->is_pending) { /* transfer not pending */ return; @@ -280,6 +284,10 @@ libusb20_tr_stop(struct libusb20_transfer *xfer) void libusb20_tr_drain(struct libusb20_transfer *xfer) { + if (!xfer->is_opened) { + /* transfer is not opened */ + return; + } /* make sure that we are cancelling */ libusb20_tr_stop(xfer); @@ -415,9 +423,79 @@ libusb20_tr_setup_isoc(struct libusb20_transfer *xfer, void *pBuf, uint32_t leng return; } +uint8_t +libusb20_tr_bulk_intr_sync(struct libusb20_transfer *xfer, + void *pbuf, uint32_t length, uint32_t *pactlen, + uint32_t timeout) +{ + struct libusb20_device *pdev = xfer->pdev; + uint32_t transfer_max; + uint32_t transfer_act; + uint8_t retval; + + /* set some sensible default value */ + if (pactlen != NULL) + *pactlen = 0; + + /* check for error condition */ + if (libusb20_tr_pending(xfer)) + return (LIBUSB20_ERROR_OTHER); + + do { + /* compute maximum transfer length */ + transfer_max = + libusb20_tr_get_max_total_length(xfer); + + if (transfer_max > length) + transfer_max = length; + + /* setup bulk or interrupt transfer */ + libusb20_tr_setup_bulk(xfer, pbuf, + transfer_max, timeout); + + /* start the transfer */ + libusb20_tr_start(xfer); + + /* wait for transfer completion */ + while (libusb20_dev_process(pdev) == 0) { + + if (libusb20_tr_pending(xfer) == 0) + break; + + libusb20_dev_wait_process(pdev, -1); + } + + transfer_act = libusb20_tr_get_actual_length(xfer); + + /* update actual length, if any */ + if (pactlen != NULL) + pactlen[0] += transfer_act; + + /* check transfer status */ + retval = libusb20_tr_get_status(xfer); + if (retval) + break; + + /* check for short transfer */ + if (transfer_act != transfer_max) + break; + + /* update buffer pointer and length */ + pbuf = ((uint8_t *)pbuf) + transfer_max; + length = length - transfer_max; + + } while (length != 0); + + return (retval); +} + void libusb20_tr_submit(struct libusb20_transfer *xfer) { + if (!xfer->is_opened) { + /* transfer is not opened */ + return; + } if (xfer->is_pending) { /* should not happen */ return; @@ -433,6 +511,10 @@ libusb20_tr_submit(struct libusb20_transfer *xfer) void libusb20_tr_start(struct libusb20_transfer *xfer) { + if (!xfer->is_opened) { + /* transfer is not opened */ + return; + } if (xfer->is_pending) { if (xfer->is_cancel) { /* cancelling - restart */ @@ -461,7 +543,14 @@ libusb20_dev_close(struct libusb20_device *pdev) for (x = 0; x != pdev->nTransfer; x++) { xfer = pdev->pTransfer + x; + if (!xfer->is_opened) { + /* transfer is not opened */ + continue; + } + libusb20_tr_drain(xfer); + + libusb20_tr_close(xfer); } if (pdev->pTransfer != NULL) { diff --git a/lib/libusb/libusb20.h b/lib/libusb/libusb20.h index 9f45861..bb7d8a4 100644 --- a/lib/libusb/libusb20.h +++ b/lib/libusb/libusb20.h @@ -1,6 +1,6 @@ /* $FreeBSD$ */ /*- - * Copyright (c) 2008 Hans Petter Selasky. All rights reserved. + * Copyright (c) 2008-2009 Hans Petter Selasky. All rights reserved. * Copyright (c) 2007-2008 Daniel Drake. All rights reserved. * Copyright (c) 2001 Johannes Erdfelt. All rights reserved. * @@ -226,6 +226,7 @@ void libusb20_tr_setup_bulk(struct libusb20_transfer *xfer, void *pbuf, uint32_t void libusb20_tr_setup_control(struct libusb20_transfer *xfer, void *psetup, void *pbuf, uint32_t timeout); void libusb20_tr_setup_intr(struct libusb20_transfer *xfer, void *pbuf, uint32_t length, uint32_t timeout); void libusb20_tr_setup_isoc(struct libusb20_transfer *xfer, void *pbuf, uint32_t length, uint16_t fr_index); +uint8_t libusb20_tr_bulk_intr_sync(struct libusb20_transfer *xfer, void *pbuf, uint32_t length, uint32_t *pactlen, uint32_t timeout); void libusb20_tr_start(struct libusb20_transfer *xfer); void libusb20_tr_stop(struct libusb20_transfer *xfer); void libusb20_tr_submit(struct libusb20_transfer *xfer); -- cgit v1.1 From fe35bb371035da80542ae632759a31caac101e0a Mon Sep 17 00:00:00 2001 From: dougb Date: Fri, 11 Dec 2009 01:23:58 +0000 Subject: MFC r199958: Update to BIND 9.6.1-P2. The vulnerability this is designed to fix is related to DNSSEC validation on a resolving name server that allows access to untrusted users. If your system does not fall into all 3 of these categories you do not need to update immediately. --- lib/bind/config.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/bind/config.h b/lib/bind/config.h index 8e52426..1d68450 100644 --- a/lib/bind/config.h +++ b/lib/bind/config.h @@ -277,6 +277,10 @@ int sigwait(const unsigned int *set, int *sig); /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR ".libs/" + /* Defined if extern char *optarg is not declared. */ /* #undef NEED_OPTARG */ -- cgit v1.1 From 64dfd453c6caa9e0e21d382f621b8b6a17d25ae8 Mon Sep 17 00:00:00 2001 From: rwatson Date: Mon, 14 Dec 2009 13:13:43 +0000 Subject: Merge r197841 from head to stable/8: Add a new errno, ENOTCAPABLE, to be returned when a process requests an operation on a file descriptor that is not authorized by the descriptor's capability flags. Sponsored by: Google --- lib/libc/gen/errlst.c | 1 + lib/libc/sys/intro.2 | 3 +++ 2 files changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/libc/gen/errlst.c b/lib/libc/gen/errlst.c index bc3e0c9..db6bfa2 100644 --- a/lib/libc/gen/errlst.c +++ b/lib/libc/gen/errlst.c @@ -150,5 +150,6 @@ const char *const sys_errlist[] = { "Multihop attempted", /* 90 - EMULTIHOP */ "Link has been severed", /* 91 - ENOLINK */ "Protocol error", /* 92 - EPROTO */ + "Capabilities insufficient", /* 93 - ENOTCAPABLE */ }; const int sys_nerr = sizeof(sys_errlist) / sizeof(sys_errlist[0]); diff --git a/lib/libc/sys/intro.2 b/lib/libc/sys/intro.2 index 7338f8b..1138483 100644 --- a/lib/libc/sys/intro.2 +++ b/lib/libc/sys/intro.2 @@ -456,6 +456,9 @@ The specified extended attribute does not exist. .It Er 88 EDOOFUS Em "Programming error" . A function or API is being abused in a way which could only be detected at run-time. +.It Er 93 ENOTCAPABLE Em "Capabilities insufficient" . +An operation on a capability file descriptor requires greater privilege than +the capability allows. .El .Sh DEFINITIONS .Bl -tag -width Ds -- cgit v1.1 From 0e440163ad12fb2b7cce130c8d5ed6eb4b566c3c Mon Sep 17 00:00:00 2001 From: marcel Date: Thu, 17 Dec 2009 02:06:07 +0000 Subject: MFC rev 200498: Work-around a race condition on ia64 while unlocking a contested lock. --- lib/libthr/thread/thr_umtx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/libthr/thread/thr_umtx.c b/lib/libthr/thread/thr_umtx.c index 7c59bb7..5af923d 100644 --- a/lib/libthr/thread/thr_umtx.c +++ b/lib/libthr/thread/thr_umtx.c @@ -112,10 +112,13 @@ __thr_umutex_timedlock(struct umutex *mtx, uint32_t id, int __thr_umutex_unlock(struct umutex *mtx, uint32_t id) { +#ifndef __ia64__ + /* XXX this logic has a race-condition on ia64. */ if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) { atomic_cmpset_rel_32(&mtx->m_owner, id | UMUTEX_CONTESTED, UMUTEX_CONTESTED); return _umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE, 0, 0, 0); } +#endif /* __ia64__ */ return _umtx_op_err(mtx, UMTX_OP_MUTEX_UNLOCK, 0, 0, 0); } -- cgit v1.1 From bde46f0c425a67a0bcb9d266dfff41b9b315ee5d Mon Sep 17 00:00:00 2001 From: kib Date: Thu, 17 Dec 2009 18:56:52 +0000 Subject: MFC r199826: sigset() is the name of function specified by SUSv4. Replace it to avoid conflict. --- lib/libutil/pw_util.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libutil/pw_util.c b/lib/libutil/pw_util.c index 69232fb..75459e3 100644 --- a/lib/libutil/pw_util.c +++ b/lib/libutil/pw_util.c @@ -289,7 +289,7 @@ int pw_edit(int notsetuid) { struct sigaction sa, sa_int, sa_quit; - sigset_t oldsigset, sigset; + sigset_t oldsigset, nsigset; struct stat st1, st2; const char *editor; int pstat; @@ -303,9 +303,9 @@ pw_edit(int notsetuid) sa.sa_flags = 0; sigaction(SIGINT, &sa, &sa_int); sigaction(SIGQUIT, &sa, &sa_quit); - sigemptyset(&sigset); - sigaddset(&sigset, SIGCHLD); - sigprocmask(SIG_BLOCK, &sigset, &oldsigset); + sigemptyset(&nsigset); + sigaddset(&nsigset, SIGCHLD); + sigprocmask(SIG_BLOCK, &nsigset, &oldsigset); switch ((editpid = fork())) { case -1: return (-1); -- cgit v1.1 From a840b236b48e169ae967870732dcf792adb9acb5 Mon Sep 17 00:00:00 2001 From: jhb Date: Thu, 17 Dec 2009 20:41:27 +0000 Subject: MFC 199606, 199614: Add an internal _once() method. This works identical to pthread_once(3) with the additional property that it is safe for routines in libc to use in both single-threaded and multi-threaded processes. Multi-threaded processes use the pthread_once() implementation from the threading library while single-threaded processes use a simplified "stub" version internal to libc. --- lib/libc/gen/Makefile.inc | 3 +- lib/libc/gen/_once_stub.c | 64 +++++++++++++++++++++++++++++++++++++++++ lib/libc/include/libc_private.h | 7 +++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 lib/libc/gen/_once_stub.c (limited to 'lib') diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index b06f846..903fa22 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -5,7 +5,8 @@ .PATH: ${.CURDIR}/${MACHINE_ARCH}/gen ${.CURDIR}/gen SRCS+= __getosreldate.c __xuname.c \ - _pthread_stubs.c _rand48.c _spinlock_stub.c _thread_init.c \ + _once_stub.c _pthread_stubs.c _rand48.c _spinlock_stub.c \ + _thread_init.c \ alarm.c arc4random.c assert.c basename.c check_utility_compat.c \ clock.c closedir.c confstr.c \ crypt.c ctermid.c daemon.c devname.c dirname.c disklabel.c \ diff --git a/lib/libc/gen/_once_stub.c b/lib/libc/gen/_once_stub.c new file mode 100644 index 0000000..d2acc29 --- /dev/null +++ b/lib/libc/gen/_once_stub.c @@ -0,0 +1,64 @@ +/*- + * Copyright (c) 2009 Advanced Computing Technologies LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include "un-namespace.h" +#include "libc_private.h" + +/* This implements pthread_once() for the single-threaded case. */ +static int +_libc_once(pthread_once_t *once_control, void (*init_routine)(void)) +{ + + if (once_control->state == PTHREAD_DONE_INIT) + return (0); + init_routine(); + once_control->state = PTHREAD_DONE_INIT; + return (0); +} + +/* + * This is the internal interface provided to libc. It will use + * pthread_once() from the threading library in a multi-threaded + * process and _libc_once() for a single-threaded library. Because + * _libc_once() uses the same ABI for the values in the pthread_once_t + * structure as the threading library, it is safe for a process to + * switch from _libc_once() to pthread_once() when threading is + * enabled. + */ +int +_once(pthread_once_t *once_control, void (*init_routine)(void)) +{ + + if (__isthreaded) + return (_pthread_once(once_control, init_routine)); + return (_libc_once(once_control, init_routine)); +} diff --git a/lib/libc/include/libc_private.h b/lib/libc/include/libc_private.h index 50903f3..052eb13 100644 --- a/lib/libc/include/libc_private.h +++ b/lib/libc/include/libc_private.h @@ -34,6 +34,7 @@ #ifndef _LIBC_PRIVATE_H_ #define _LIBC_PRIVATE_H_ +#include /* * This global flag is non-zero when a process has created one @@ -147,6 +148,12 @@ int _yp_check(char **); void _init_tls(void); /* + * Provides pthread_once()-like functionality for both single-threaded + * and multi-threaded applications. + */ +int _once(pthread_once_t *, void (*)(void)); + +/* * Set the TLS thread pointer */ void _set_tp(void *tp); -- cgit v1.1 From c55271bb4d1df908a79cd7e9a0f7f8839eac2d45 Mon Sep 17 00:00:00 2001 From: brooks Date: Fri, 18 Dec 2009 06:09:43 +0000 Subject: MFC r200366 Add a missing else that negated the truncation of ki_ngroups to NGROUPS. Submitted by: Dmitry Pryanishnikov --- lib/libkvm/kvm_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libkvm/kvm_proc.c b/lib/libkvm/kvm_proc.c index 071f859..f8f8483 100644 --- a/lib/libkvm/kvm_proc.c +++ b/lib/libkvm/kvm_proc.c @@ -149,7 +149,7 @@ kvm_proclist(kd, what, arg, p, bp, maxcnt) if (ucred.cr_ngroups > KI_NGROUPS) { kp->ki_ngroups = KI_NGROUPS; kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW; - } + } else kp->ki_ngroups = ucred.cr_ngroups; kvm_read(kd, (u_long)ucred.cr_groups, kp->ki_groups, kp->ki_ngroups * sizeof(gid_t)); -- cgit v1.1 From 099d1bfda5e6c17840e60f4ec0be795da67d3585 Mon Sep 17 00:00:00 2001 From: scf Date: Fri, 18 Dec 2009 20:05:10 +0000 Subject: Merge from head to stable/8: r200423: Remove a dead store. --- lib/libutil/gr_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libutil/gr_util.c b/lib/libutil/gr_util.c index 77e0653..633f435 100644 --- a/lib/libutil/gr_util.c +++ b/lib/libutil/gr_util.c @@ -117,8 +117,8 @@ gr_make(const struct group *gr) /* Create the group line and fill it. */ if ((line = malloc(line_size)) == NULL) return (NULL); - line_size = snprintf(line, line_size, group_line_format, gr->gr_name, - gr->gr_passwd, (uintmax_t)gr->gr_gid); + snprintf(line, line_size, group_line_format, gr->gr_name, gr->gr_passwd, + (uintmax_t)gr->gr_gid); if (gr->gr_mem != NULL) for (ndx = 0; gr->gr_mem[ndx] != NULL; ndx++) { strcat(line, gr->gr_mem[ndx]); -- cgit v1.1 From 176e26b324e1868e4bf61c5bb3a7a64eb7ad048b Mon Sep 17 00:00:00 2001 From: kib Date: Sat, 19 Dec 2009 11:47:00 +0000 Subject: MFC r198508, r198509: Reimplement pselect() in kernel, making change of sigmask and sleep atomic. MFC r198538: Move pselect(3) man page to section 2. --- lib/libc/gen/Makefile.inc | 4 +- lib/libc/gen/Symbol.map | 2 - lib/libc/gen/pselect.3 | 127 --------------------------------------- lib/libc/gen/pselect.c | 78 ------------------------ lib/libc/sys/Makefile.inc | 3 +- lib/libc/sys/Symbol.map | 3 + lib/libc/sys/pselect.2 | 122 +++++++++++++++++++++++++++++++++++++ lib/libthr/thread/thr_syscalls.c | 4 +- 8 files changed, 132 insertions(+), 211 deletions(-) delete mode 100644 lib/libc/gen/pselect.3 delete mode 100644 lib/libc/gen/pselect.c create mode 100644 lib/libc/sys/pselect.2 (limited to 'lib') diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 903fa22..d04dd75 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -22,7 +22,7 @@ SRCS+= __getosreldate.c __xuname.c \ initgroups.c isatty.c isinf.c isnan.c jrand48.c lcong48.c \ lockf.c lrand48.c mrand48.c nftw.c nice.c \ nlist.c nrand48.c opendir.c \ - pause.c pmadvise.c popen.c posix_spawn.c pselect.c \ + pause.c pmadvise.c popen.c posix_spawn.c \ psignal.c pw_scan.c pwcache.c \ raise.c readdir.c readpassphrase.c rewinddir.c \ scandir.c seed48.c seekdir.c sem.c semctl.c \ @@ -63,7 +63,7 @@ MAN+= alarm.3 arc4random.3 \ posix_spawnattr_getpgroup.3 posix_spawnattr_getschedparam.3 \ posix_spawnattr_getschedpolicy.3 posix_spawnattr_init.3 \ posix_spawnattr_getsigdefault.3 posix_spawnattr_getsigmask.3 \ - pselect.3 psignal.3 pwcache.3 \ + psignal.3 pwcache.3 \ raise.3 rand48.3 readpassphrase.3 rfork_thread.3 \ scandir.3 sem_destroy.3 sem_getvalue.3 sem_init.3 \ sem_open.3 sem_post.3 sem_timedwait.3 sem_wait.3 \ diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index 197430a..4c19a3d 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -223,7 +223,6 @@ FBSD_1.0 { posix_madvise; popen; pclose; - pselect; psignal; raise; readdir; @@ -453,7 +452,6 @@ FBSDprivate_1.0 { __opendir2; __pause; _pause; - __pselect; __pw_scan; /* Used by (at least) libutil */ __raise; _raise; diff --git a/lib/libc/gen/pselect.3 b/lib/libc/gen/pselect.3 deleted file mode 100644 index a56a0a8..0000000 --- a/lib/libc/gen/pselect.3 +++ /dev/null @@ -1,127 +0,0 @@ -.\" -.\" Copyright 2002 Massachusetts Institute of Technology -.\" -.\" Permission to use, copy, modify, and distribute this software and -.\" its documentation for any purpose and without fee is hereby -.\" granted, provided that both the above copyright notice and this -.\" permission notice appear in all copies, that both the above -.\" copyright notice and this permission notice appear in all -.\" supporting documentation, and that the name of M.I.T. not be used -.\" in advertising or publicity pertaining to distribution of the -.\" software without specific, written prior permission. M.I.T. makes -.\" no representations about the suitability of this software for any -.\" purpose. It is provided "as is" without express or implied -.\" warranty. -.\" -.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS -.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, -.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT -.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.Dd June 16, 2002 -.Dt PSELECT 3 -.Os -.Sh NAME -.Nm pselect -.Nd synchronous I/O multiplexing a la POSIX.1g -.Sh LIBRARY -.Lb libc -.Sh SYNOPSIS -.In sys/select.h -.Ft int -.Fo pselect -.Fa "int nfds" -.Fa "fd_set * restrict readfds" -.Fa "fd_set * restrict writefds" -.Fa "fd_set * restrict exceptfds" -.Fa "const struct timespec * restrict timeout" -.Fa "const sigset_t * restrict newsigmask" -.Fc -.Sh DESCRIPTION -The -.Fn pselect -function was introduced by -.St -p1003.1g-2000 -as a slightly stronger version of -.Xr select 2 . -The -.Fa nfds , readfds , writefds , -and -.Fa exceptfds -arguments are all identical to the analogous arguments of -.Fn select . -The -.Fa timeout -argument in -.Fn pselect -points to a -.Vt "const struct timespec" -rather than the (modifiable) -.Vt "struct timeval" -used by -.Fn select ; -as in -.Fn select , -a null pointer may be passed to indicate that -.Fn pselect -should wait indefinitely. -Finally, -.Fa newsigmask -specifies a signal mask which is set while waiting for input. -When -.Fn pselect -returns, the original signal mask is restored. -.Pp -See -.Xr select 2 -for a more detailed discussion of the semantics of this interface, and -for macros used to manipulate the -.Vt "fd_set" -data type. -.Sh IMPLEMENTATION NOTES -The -.Fn pselect -function is implemented in the C library as a wrapper around -.Fn select . -.Sh RETURN VALUES -The -.Fn pselect -function returns the same values and under the same conditions as -.Fn select . -.Sh ERRORS -The -.Fn pselect -function may fail for any of the reasons documented for -.Xr select 2 -and (if a signal mask is provided) -.Xr sigprocmask 2 . -.Sh SEE ALSO -.Xr kqueue 2 , -.Xr poll 2 , -.Xr select 2 , -.Xr sigprocmask 2 -.Sh STANDARDS -The -.Fn pselect -function conforms to -.St -p1003.1-2001 . -.Sh HISTORY -The -.Fn pselect -function first appeared in -.Fx 5.0 . -.Sh AUTHORS -The -.Fn pselect -function and this manual page were written by -.An Garrett Wollman Aq wollman@FreeBSD.org . diff --git a/lib/libc/gen/pselect.c b/lib/libc/gen/pselect.c deleted file mode 100644 index 28066a2..0000000 --- a/lib/libc/gen/pselect.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2000 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include "namespace.h" -#include -#include - -#include -#include -#include "un-namespace.h" - -__weak_reference(__pselect, pselect); - -/* - * Emulate the POSIX 1003.1g-2000 `pselect' interface. This is the - * same as the traditional BSD `select' function, except that it uses - * a timespec rather than a timeval, doesn't modify the timeout argument, - * and allows the user to specify a signal mask to apply during the select. - */ -int -__pselect(int count, fd_set * __restrict rfds, fd_set * __restrict wfds, - fd_set * __restrict efds, const struct timespec * __restrict timo, - const sigset_t * __restrict mask) -{ - sigset_t omask; - struct timeval tvtimo, *tvp; - int rv, sverrno; - - if (timo) { - TIMESPEC_TO_TIMEVAL(&tvtimo, timo); - tvp = &tvtimo; - } else - tvp = 0; - - if (mask != 0) { - rv = _sigprocmask(SIG_SETMASK, mask, &omask); - if (rv != 0) - return rv; - } - - rv = _select(count, rfds, wfds, efds, tvp); - if (mask != 0) { - sverrno = errno; - _sigprocmask(SIG_SETMASK, &omask, (sigset_t *)0); - errno = sverrno; - } - - return rv; -} diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index 1e6059d..1915c55 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -83,7 +83,8 @@ MAN+= abort2.2 accept.2 access.2 acct.2 adjtime.2 \ mq_setattr.2 \ msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \ msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \ - pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 ptrace.2 quotactl.2 \ + pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 \ + pselect.2 ptrace.2 quotactl.2 \ read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \ rtprio.2 .if !defined(NO_P1003_1B) diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index 56d8aaa..7ff42da 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -211,6 +211,7 @@ FBSD_1.0 { posix_openpt; preadv; profil; + pselect; ptrace; pwritev; quotactl; @@ -779,6 +780,8 @@ FBSDprivate_1.0 { __sys_preadv; _profil; __sys_profil; + _pselect; + __sys_pselect; _ptrace; __sys_ptrace; _pwritev; diff --git a/lib/libc/sys/pselect.2 b/lib/libc/sys/pselect.2 new file mode 100644 index 0000000..cf784a5 --- /dev/null +++ b/lib/libc/sys/pselect.2 @@ -0,0 +1,122 @@ +.\" +.\" Copyright 2002 Massachusetts Institute of Technology +.\" +.\" Permission to use, copy, modify, and distribute this software and +.\" its documentation for any purpose and without fee is hereby +.\" granted, provided that both the above copyright notice and this +.\" permission notice appear in all copies, that both the above +.\" copyright notice and this permission notice appear in all +.\" supporting documentation, and that the name of M.I.T. not be used +.\" in advertising or publicity pertaining to distribution of the +.\" software without specific, written prior permission. M.I.T. makes +.\" no representations about the suitability of this software for any +.\" purpose. It is provided "as is" without express or implied +.\" warranty. +.\" +.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS +.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, +.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT +.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd October 27, 2009 +.Dt PSELECT 2 +.Os +.Sh NAME +.Nm pselect +.Nd synchronous I/O multiplexing a la POSIX.1g +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/select.h +.Ft int +.Fo pselect +.Fa "int nfds" +.Fa "fd_set * restrict readfds" +.Fa "fd_set * restrict writefds" +.Fa "fd_set * restrict exceptfds" +.Fa "const struct timespec * restrict timeout" +.Fa "const sigset_t * restrict newsigmask" +.Fc +.Sh DESCRIPTION +The +.Fn pselect +function was introduced by +.St -p1003.1g-2000 +as a slightly stronger version of +.Xr select 2 . +The +.Fa nfds , readfds , writefds , +and +.Fa exceptfds +arguments are all identical to the analogous arguments of +.Fn select . +The +.Fa timeout +argument in +.Fn pselect +points to a +.Vt "const struct timespec" +rather than the (modifiable) +.Vt "struct timeval" +used by +.Fn select ; +as in +.Fn select , +a null pointer may be passed to indicate that +.Fn pselect +should wait indefinitely. +Finally, +.Fa newsigmask +specifies a signal mask which is set while waiting for input. +When +.Fn pselect +returns, the original signal mask is restored. +.Pp +See +.Xr select 2 +for a more detailed discussion of the semantics of this interface, and +for macros used to manipulate the +.Vt "fd_set" +data type. +.Sh RETURN VALUES +The +.Fn pselect +function returns the same values and under the same conditions as +.Fn select . +.Sh ERRORS +The +.Fn pselect +function may fail for any of the reasons documented for +.Xr select 2 +and (if a signal mask is provided) +.Xr sigprocmask 2 . +.Sh SEE ALSO +.Xr kqueue 2 , +.Xr poll 2 , +.Xr select 2 , +.Xr sigprocmask 2 +.Sh STANDARDS +The +.Fn pselect +function conforms to +.St -p1003.1-2001 . +.Sh HISTORY +The +.Fn pselect +function first appeared in +.Fx 5.0 . +.Sh AUTHORS +The first implementation of +.Fn pselect +function and this manual page were written by +.An Garrett Wollman Aq wollman@FreeBSD.org . diff --git a/lib/libthr/thread/thr_syscalls.c b/lib/libthr/thread/thr_syscalls.c index 698c0c3..31af6fa 100644 --- a/lib/libthr/thread/thr_syscalls.c +++ b/lib/libthr/thread/thr_syscalls.c @@ -104,6 +104,8 @@ extern int __sys_accept(int, struct sockaddr *, socklen_t *); extern int __sys_connect(int, const struct sockaddr *, socklen_t); extern int __sys_fsync(int); extern int __sys_msync(void *, size_t, int); +extern int __sys_pselect(int, fd_set *, fd_set *, fd_set *, + const struct timespec *, const sigset_t *); extern int __sys_poll(struct pollfd *, unsigned, int); extern ssize_t __sys_recv(int, void *, size_t, int); extern ssize_t __sys_recvfrom(int, void *, size_t, int, struct sockaddr *, socklen_t *); @@ -366,7 +368,7 @@ ___pselect(int count, fd_set *rfds, fd_set *wfds, fd_set *efds, int ret; _thr_cancel_enter(curthread); - ret = __pselect(count, rfds, wfds, efds, timo, mask); + ret = __sys_pselect(count, rfds, wfds, efds, timo, mask); _thr_cancel_leave(curthread); return (ret); -- cgit v1.1 From 18b2bd4c02f62a2b46ccb3eb6aa402d7f2368f40 Mon Sep 17 00:00:00 2001 From: attilio Date: Sat, 19 Dec 2009 19:22:09 +0000 Subject: MFC r199801: Fix a socket leak. Sponsored by: Sandvine Incorporated --- lib/libfetch/ftp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libfetch/ftp.c b/lib/libfetch/ftp.c index d0d597f..a0ba510 100644 --- a/lib/libfetch/ftp.c +++ b/lib/libfetch/ftp.c @@ -1122,17 +1122,19 @@ ftp_request(struct url *url, const char *op, struct url_stat *us, /* change directory */ if (ftp_cwd(conn, url->doc) == -1) - return (NULL); + goto errsock; /* stat file */ if (us && ftp_stat(conn, url->doc, us) == -1 && fetchLastErrCode != FETCH_PROTO && fetchLastErrCode != FETCH_UNAVAIL) - return (NULL); + goto errsock; /* just a stat */ - if (strcmp(op, "STAT") == 0) + if (strcmp(op, "STAT") == 0) { + ftp_disconnect(conn); return (FILE *)1; /* bogus return value */ + } if (strcmp(op, "STOR") == 0 || strcmp(op, "APPE") == 0) oflag = O_WRONLY; else @@ -1140,6 +1142,10 @@ ftp_request(struct url *url, const char *op, struct url_stat *us, /* initiate the transfer */ return (ftp_transfer(conn, op, url->doc, oflag, url->offset, flags)); + +errsock: + ftp_disconnect(conn); + return (NULL); } /* -- cgit v1.1 From ed4791ae105cf5fe3f9807963e17806048d19686 Mon Sep 17 00:00:00 2001 From: attilio Date: Sat, 19 Dec 2009 19:23:25 +0000 Subject: MFC r199802: Fix handling of empty attributes. Sponsored by: Sandvine Incorporated --- lib/libtacplus/taclib.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libtacplus/taclib.c b/lib/libtacplus/taclib.c index 6ac3c72..a44e834 100644 --- a/lib/libtacplus/taclib.c +++ b/lib/libtacplus/taclib.c @@ -1263,8 +1263,13 @@ tac_get_av_value(struct tac_handle *h, const char *attribute) * h->srvr_avs[0] = "foobie=var1" * h->srvr_avs[1] = "foo=var2" * is handled. + * + * Note that for empty string attribute values a + * 0-length string is returned in order to distinguish + * against unset values. + * dump_str() will handle srvr.len == 0 correctly. */ - if (found_seperator == 1 && ch != end) { + if (found_seperator == 1) { srvr.len = end - ch; srvr.data = ch; return dup_str(h, &srvr, NULL); -- cgit v1.1 From 436b85c22382d16f0163e9e7bbc2ba2faa79e532 Mon Sep 17 00:00:00 2001 From: jamie Date: Sun, 20 Dec 2009 04:49:29 +0000 Subject: MFC r200623: Add a null pointer check so "name" can be used as a key parameter in jailparam_get. PR: bin/141692 Submitted by: delphij --- lib/libjail/jail.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libjail/jail.c b/lib/libjail/jail.c index 9411b88..39c88d0 100644 --- a/lib/libjail/jail.c +++ b/lib/libjail/jail.c @@ -532,7 +532,7 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags) } jp_key = jp_lastjid ? jp_lastjid : jp_jid && jp_jid->jp_valuelen == sizeof(int) && - *(int *)jp_jid->jp_value ? jp_jid : jp_name; + jp_jid->jp_value && *(int *)jp_jid->jp_value ? jp_jid : jp_name; if (jp_key == NULL || jp_key->jp_value == NULL) { strlcpy(jail_errmsg, "no jail specified", JAIL_ERRMSGLEN); errno = ENOENT; -- cgit v1.1 From defe73f9cee2bf25885efe6f31614f4947dcdeca Mon Sep 17 00:00:00 2001 From: scf Date: Sun, 20 Dec 2009 18:57:43 +0000 Subject: Merge from head to stable/8: r200424: Fix libusb_open_device_with_vid_pid() to return a NULL if no device is found instead of the last device in its search list. Reviewed by: thompsa --- lib/libusb/libusb10.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libusb/libusb10.c b/lib/libusb/libusb10.c index 9a5154e..25520d2 100644 --- a/lib/libusb/libusb10.c +++ b/lib/libusb/libusb10.c @@ -379,8 +379,6 @@ libusb_open_device_with_vid_pid(libusb_context *ctx, uint16_t vendor_id, if ((i = libusb_get_device_list(ctx, &devs)) < 0) return (NULL); - pdev = NULL; - for (j = 0; j < i; j++) { pdev = devs[j]->os_priv; pdesc = libusb20_dev_get_device_desc(pdev); @@ -396,6 +394,8 @@ libusb_open_device_with_vid_pid(libusb_context *ctx, uint16_t vendor_id, break; } } + if (j == i) + pdev = NULL; libusb_free_device_list(devs, 1); DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_open_device_width_vid_pid leave"); -- cgit v1.1 From 745ca4f2c407635e1681257b0f599c231a47b48b Mon Sep 17 00:00:00 2001 From: jilles Date: Tue, 22 Dec 2009 13:45:29 +0000 Subject: MFC r200589: cpuset(2): fix a typo and a markup error in the man page --- lib/libc/sys/cpuset.2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/cpuset.2 b/lib/libc/sys/cpuset.2 index c07f8d9..fb9a770 100644 --- a/lib/libc/sys/cpuset.2 +++ b/lib/libc/sys/cpuset.2 @@ -96,7 +96,7 @@ The .Fa which argument may have the following values: .Bl -column CPU_WHICH_CPUSET -offset indent -.It Dv CPU_WHICH_TID Ta "id is lpwid_t (thread id)" +.It Dv CPU_WHICH_TID Ta "id is lwpid_t (thread id)" .It Dv CPU_WHICH_PID Ta "id is pid_t (process id)" .It Dv CPU_WHICH_CPUSET Ta "id is a cpusetid_t (cpuset id)" .It Dv CPU_WHICH_IRQ Ta "id is an irq number" @@ -209,7 +209,7 @@ The calling process did not have the credentials required to complete the operation. .It Bq Er ENFILE There was no free -.Fn cpusetid_t +.Ft cpusetid_t for allocation. .El .Sh SEE ALSO -- cgit v1.1 From d74cd5eae6166cb23de1cf9489b847350cd30f7e Mon Sep 17 00:00:00 2001 From: roam Date: Thu, 24 Dec 2009 20:35:01 +0000 Subject: MFC r199244: Fix the grammar in the isgraph(3) description, almost as per the PR. PR: 140455 Submitted by: Jeremy Huddleston --- lib/libc/locale/isgraph.3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/locale/isgraph.3 b/lib/libc/locale/isgraph.3 index 4ba78e3..5e06e2e 100644 --- a/lib/libc/locale/isgraph.3 +++ b/lib/libc/locale/isgraph.3 @@ -50,7 +50,7 @@ The function tests for any printing character except space .Pq Ql "\ " and other -locale specific space-like characters. +locale-specific space-like characters. The value of the argument must be representable as an .Vt "unsigned char" or the value of -- cgit v1.1 From 7ae7e0a261d5953e772cc84a9c78e469c005ba50 Mon Sep 17 00:00:00 2001 From: delphij Date: Sun, 27 Dec 2009 07:04:27 +0000 Subject: Explicitly say that this is an internal library which is intended to be used within FreeBSD base system only, and discourage user applications from using it. User applications should use the expat version from the ports/package collection. Reviewed by: simon (earlier version) --- lib/libexpat/libbsdxml.3 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libexpat/libbsdxml.3 b/lib/libexpat/libbsdxml.3 index 7d7b0e7..f687518 100644 --- a/lib/libexpat/libbsdxml.3 +++ b/lib/libexpat/libbsdxml.3 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\"/ -.Dd May 5, 2008 +.Dd December 12, 2009 .Dt LIBBSDXML 3 .Os .Sh NAME @@ -38,6 +38,15 @@ The .Nm library is a verbatim copy of the eXpat XML library version 2.0.1. .Pp +The +.Nm +library is intended to use within the +.Fx +base system only. +Use of the +.Nm +library for other purposes is not supported and discouraged. +.Pp To avoid version and autoconfiguration issues, the library has been renamed to .Nm -- cgit v1.1 From 983a441bd6cee6cbccd5c4982ba22e6982f24e2c Mon Sep 17 00:00:00 2001 From: kib Date: Sun, 27 Dec 2009 20:39:58 +0000 Subject: MFC r200038: Properly support -fPIE by linking PIE binaries with specially-built Scrt1.o instead of crt1.o, since the later is built as non-PIC. Separate i386-elf crt1.c into the pure assembler part and C code, supplying all data extracted by assembler stub as explicit parameters. Hide and localize _start1 symbol used as an interface between asm and C code. --- lib/csu/amd64/Makefile | 5 +- lib/csu/arm/Makefile | 5 +- lib/csu/i386-elf/Makefile | 24 ++++++++-- lib/csu/i386-elf/crt1.c | 113 ---------------------------------------------- lib/csu/i386-elf/crt1_c.c | 95 ++++++++++++++++++++++++++++++++++++++ lib/csu/i386-elf/crt1_s.S | 44 ++++++++++++++++++ lib/csu/ia64/Makefile | 5 +- lib/csu/mips/Makefile | 5 +- lib/csu/powerpc/Makefile | 5 +- lib/csu/sparc64/Makefile | 5 +- 10 files changed, 182 insertions(+), 124 deletions(-) delete mode 100644 lib/csu/i386-elf/crt1.c create mode 100644 lib/csu/i386-elf/crt1_c.c create mode 100644 lib/csu/i386-elf/crt1_s.S (limited to 'lib') diff --git a/lib/csu/amd64/Makefile b/lib/csu/amd64/Makefile index 71ccd67..1a74efc 100644 --- a/lib/csu/amd64/Makefile +++ b/lib/csu/amd64/Makefile @@ -4,7 +4,7 @@ SRCS= crt1.c crti.S crtn.S OBJS= ${SRCS:N*.h:R:S/$/.o/g} -OBJS+= gcrt1.o +OBJS+= Scrt1.o gcrt1.o CFLAGS+= -I${.CURDIR}/../common \ -I${.CURDIR}/../../libc/include CFLAGS+= -fno-omit-frame-pointer @@ -16,6 +16,9 @@ CLEANFILES= ${OBJS} gcrt1.o: crt1.c ${CC} ${CFLAGS} -DGCRT -c -o gcrt1.o ${.CURDIR}/crt1.c +Scrt1.o: crt1.c + ${CC} ${CFLAGS} -fPIC -DPIC -c -o Scrt1.o ${.CURDIR}/crt1.c + realinstall: ${INSTALL} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${OBJS} ${DESTDIR}${LIBDIR} diff --git a/lib/csu/arm/Makefile b/lib/csu/arm/Makefile index 23954e8..097f82d 100644 --- a/lib/csu/arm/Makefile +++ b/lib/csu/arm/Makefile @@ -4,7 +4,7 @@ SRCS= crt1.c crti.S crtn.S OBJS= ${SRCS:N*.h:R:S/$/.o/g} -OBJS+= gcrt1.o +OBJS+= Scrt1.o gcrt1.o CFLAGS+= -Wall -Wno-unused \ -I${.CURDIR}/../common \ -I${.CURDIR}/../../libc/include @@ -16,6 +16,9 @@ CLEANFILES= ${OBJS} gcrt1.o: crt1.c ${CC} ${CFLAGS} -DGCRT -c -o gcrt1.o ${.ALLSRC} +Scrt1.o: crt1.c + ${CC} ${CFLAGS} -fPIC -DPIC -c -o Scrt1.o ${.ALLSRC} + realinstall: ${INSTALL} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${OBJS} ${DESTDIR}${LIBDIR} diff --git a/lib/csu/i386-elf/Makefile b/lib/csu/i386-elf/Makefile index 8598ce8..c2af118 100644 --- a/lib/csu/i386-elf/Makefile +++ b/lib/csu/i386-elf/Makefile @@ -2,8 +2,8 @@ .PATH: ${.CURDIR}/../common -SRCS= crt1.c crti.S crtn.S -FILES= ${SRCS:N*.h:R:S/$/.o/g} gcrt1.o +SRCS= crti.S crtn.S +FILES= ${SRCS:N*.h:R:S/$/.o/g} gcrt1.o crt1.o Scrt1.o FILESOWN= ${LIBOWN} FILESGRP= ${LIBGRP} FILESMODE= ${LIBMODE} @@ -11,9 +11,23 @@ FILESDIR= ${LIBDIR} WARNS?= 6 CFLAGS+= -I${.CURDIR}/../common \ -I${.CURDIR}/../../libc/include -CLEANFILES= ${FILES} +CLEANFILES= ${FILES} crt1_c.o crt1_s.o gcrt1_c.o Scrt1_c.o -gcrt1.o: crt1.c - ${CC} ${CFLAGS} -DGCRT -c -o gcrt1.o ${.CURDIR}/crt1.c +gcrt1_c.o: crt1_c.c + ${CC} ${CFLAGS} -DGCRT -c -o gcrt1_c.o ${.CURDIR}/crt1_c.c + +gcrt1.o: gcrt1_c.o crt1_s.o + ${LD} ${LDFLAGS} -o gcrt1.o -r crt1_s.o gcrt1_c.o + +crt1.o: crt1_c.o crt1_s.o + ${LD} ${LDFLAGS} -o crt1.o -r crt1_s.o crt1_c.o + objcopy --localize-symbol _start1 crt1.o + +Scrt1_c.o: crt1_c.c + ${CC} ${CFLAGS} -DGCRT -fPIC -DPIC -c -o Scrt1_c.o ${.CURDIR}/crt1_c.c + +Scrt1.o: Scrt1_c.o crt1_s.o + ${LD} ${LDFLAGS} -o Scrt1.o -r crt1_s.o Scrt1_c.o + objcopy --localize-symbol _start1 Scrt1.o .include diff --git a/lib/csu/i386-elf/crt1.c b/lib/csu/i386-elf/crt1.c deleted file mode 100644 index 0934333..0000000 --- a/lib/csu/i386-elf/crt1.c +++ /dev/null @@ -1,113 +0,0 @@ -/* LINTLIBRARY */ -/*- - * Copyright 1996-1998 John D. Polstra. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef lint -#ifndef __GNUC__ -#error "GCC is needed to compile this file" -#endif -#endif /* lint */ - -#include - -#include "libc_private.h" -#include "crtbrand.c" - -extern int _DYNAMIC; -#pragma weak _DYNAMIC - -typedef void (*fptr)(void); - -extern void _fini(void); -extern void _init(void); -extern int main(int, char **, char **); -extern void _start(char *, ...); - -#ifdef GCRT -extern void _mcleanup(void); -extern void monstartup(void *, void *); -extern int eprol; -extern int etext; -#endif - -char **environ; -const char *__progname = ""; - -static __inline fptr -get_rtld_cleanup(void) -{ - fptr retval; - -#ifdef __GNUC__ - __asm__("movl %%edx,%0" : "=rm"(retval)); -#else - retval = (fptr)0; /* XXXX Fix this for other compilers */ -#endif - return(retval); -} - -/* The entry function. */ -void -_start(char *ap, ...) -{ - fptr cleanup; - int argc; - char **argv; - char **env; - const char *s; - -#ifdef __GNUC__ - __asm__("and $0xfffffff0,%esp"); -#endif - cleanup = get_rtld_cleanup(); - argv = ≈ - argc = *(long *)(void *)(argv - 1); - env = argv + argc + 1; - environ = env; - if (argc > 0 && argv[0] != NULL) { - __progname = argv[0]; - for (s = __progname; *s != '\0'; s++) - if (*s == '/') - __progname = s + 1; - } - - if (&_DYNAMIC != NULL) - atexit(cleanup); - else - _init_tls(); - -#ifdef GCRT - atexit(_mcleanup); -#endif - atexit(_fini); -#ifdef GCRT - monstartup(&eprol, &etext); -__asm__("eprol:"); -#endif - _init(); - exit( main(argc, argv, env) ); -} - -__asm__(".ident\t\"$FreeBSD$\""); diff --git a/lib/csu/i386-elf/crt1_c.c b/lib/csu/i386-elf/crt1_c.c new file mode 100644 index 0000000..c38f267 --- /dev/null +++ b/lib/csu/i386-elf/crt1_c.c @@ -0,0 +1,95 @@ +/* LINTLIBRARY */ +/*- + * Copyright 1996-1998 John D. Polstra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef lint +#ifndef __GNUC__ +#error "GCC is needed to compile this file" +#endif +#endif /* lint */ + +#include + +#include "libc_private.h" +#include "crtbrand.c" + +extern int _DYNAMIC; +#pragma weak _DYNAMIC + +typedef void (*fptr)(void); + +extern void _fini(void); +extern void _init(void); +extern int main(int, char **, char **); +extern void _start(char *, ...); + +#ifdef GCRT +extern void _mcleanup(void); +extern void monstartup(void *, void *); +extern int eprol; +extern int etext; +#endif + +char **environ; +const char *__progname = ""; + +void _start1(fptr, int, char *[]) __dead2; + +/* The entry function, C part. */ +void +_start1(fptr cleanup, int argc, char *argv[]) +{ + char **env; + const char *s; + + env = argv + argc + 1; + environ = env; + if (argc > 0 && argv[0] != NULL) { + __progname = argv[0]; + for (s = __progname; *s != '\0'; s++) + if (*s == '/') + __progname = s + 1; + } + + if (&_DYNAMIC != NULL) + atexit(cleanup); + else + _init_tls(); + +#ifdef GCRT + atexit(_mcleanup); +#endif + atexit(_fini); +#ifdef GCRT + monstartup(&eprol, &etext); +__asm__("eprol:"); +#endif + _init(); + exit( main(argc, argv, env) ); +} + +__asm(".hidden _start1"); diff --git a/lib/csu/i386-elf/crt1_s.S b/lib/csu/i386-elf/crt1_s.S new file mode 100644 index 0000000..2af8a1b --- /dev/null +++ b/lib/csu/i386-elf/crt1_s.S @@ -0,0 +1,44 @@ +/*- + * Copyright 2009 Konstantin Belousov. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + + + .text + .align 4 + .globl _start + .type _start, @function +_start: xorl %ebp,%ebp + pushl %ebp + movl %esp,%ebp + andl $0xfffffff0,%esp # align stack + leal 8(%ebp),%eax + pushl %eax # argv + pushl 4(%ebp) # argc + pushl %edx # rtld cleanup + call _start1 + .size _start, . - _start + + .ident "$FreeBSD$" diff --git a/lib/csu/ia64/Makefile b/lib/csu/ia64/Makefile index c906c09..d795103 100644 --- a/lib/csu/ia64/Makefile +++ b/lib/csu/ia64/Makefile @@ -4,7 +4,7 @@ SRCS= crt1.S crti.S crtn.S OBJS= ${SRCS:N*.h:R:S/$/.o/g} -OBJS+= gcrt1.o +OBJS+= Scrt1.o gcrt1.o CFLAGS+= -Wall -Wno-unused \ -I${.CURDIR}/../common \ -I${.CURDIR}/../../libc/include @@ -16,6 +16,9 @@ CLEANFILES= ${OBJS} gcrt1.o: crt1.S ${CC} ${CFLAGS} -DGCRT -c -o gcrt1.o ${.ALLSRC} +Scrt1.o: crt1.S + ${CC} ${CFLAGS} -fPIC -DPIC -c -o Scrt1.o ${.ALLSRC} + realinstall: ${INSTALL} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${OBJS} ${DESTDIR}${LIBDIR} diff --git a/lib/csu/mips/Makefile b/lib/csu/mips/Makefile index 23954e8..097f82d 100644 --- a/lib/csu/mips/Makefile +++ b/lib/csu/mips/Makefile @@ -4,7 +4,7 @@ SRCS= crt1.c crti.S crtn.S OBJS= ${SRCS:N*.h:R:S/$/.o/g} -OBJS+= gcrt1.o +OBJS+= Scrt1.o gcrt1.o CFLAGS+= -Wall -Wno-unused \ -I${.CURDIR}/../common \ -I${.CURDIR}/../../libc/include @@ -16,6 +16,9 @@ CLEANFILES= ${OBJS} gcrt1.o: crt1.c ${CC} ${CFLAGS} -DGCRT -c -o gcrt1.o ${.ALLSRC} +Scrt1.o: crt1.c + ${CC} ${CFLAGS} -fPIC -DPIC -c -o Scrt1.o ${.ALLSRC} + realinstall: ${INSTALL} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${OBJS} ${DESTDIR}${LIBDIR} diff --git a/lib/csu/powerpc/Makefile b/lib/csu/powerpc/Makefile index 23954e8..097f82d 100644 --- a/lib/csu/powerpc/Makefile +++ b/lib/csu/powerpc/Makefile @@ -4,7 +4,7 @@ SRCS= crt1.c crti.S crtn.S OBJS= ${SRCS:N*.h:R:S/$/.o/g} -OBJS+= gcrt1.o +OBJS+= Scrt1.o gcrt1.o CFLAGS+= -Wall -Wno-unused \ -I${.CURDIR}/../common \ -I${.CURDIR}/../../libc/include @@ -16,6 +16,9 @@ CLEANFILES= ${OBJS} gcrt1.o: crt1.c ${CC} ${CFLAGS} -DGCRT -c -o gcrt1.o ${.ALLSRC} +Scrt1.o: crt1.c + ${CC} ${CFLAGS} -fPIC -DPIC -c -o Scrt1.o ${.ALLSRC} + realinstall: ${INSTALL} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${OBJS} ${DESTDIR}${LIBDIR} diff --git a/lib/csu/sparc64/Makefile b/lib/csu/sparc64/Makefile index 2e1d03f..7f8dd7a 100644 --- a/lib/csu/sparc64/Makefile +++ b/lib/csu/sparc64/Makefile @@ -4,7 +4,7 @@ SRCS= crt1.c crti.S crtn.S OBJS= ${SRCS:N*.h:R:S/$/.o/g} -OBJS+= gcrt1.o +OBJS+= Scrt1.o gcrt1.o CFLAGS+= -I${.CURDIR}/../common -I${.CURDIR}/../../libc/include all: ${OBJS} @@ -14,6 +14,9 @@ CLEANFILES= ${OBJS} gcrt1.o: crt1.c ${CC} ${CFLAGS} -DGCRT -c -o gcrt1.o ${.ALLSRC} +Scrt1.o: crt1.c + ${CC} ${CFLAGS} -fPIC -DPIC -c -o Scrt1.o ${.ALLSRC} + realinstall: ${INSTALL} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${OBJS} ${DESTDIR}${LIBDIR} -- cgit v1.1 From 145c1109517ba8110c826af4a7bfcc913d13e413 Mon Sep 17 00:00:00 2001 From: edwin Date: Tue, 29 Dec 2009 10:05:20 +0000 Subject: MFC of r200832, tzdata2009t zic: - Fix URL / reference to Calendrical Calculations: Third Edition libc/stdtime: - Fix typo in tzfile.5 (no changes in our part) --- lib/libc/stdtime/tzfile.5 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/stdtime/tzfile.5 b/lib/libc/stdtime/tzfile.5 index 15625d2..1606b5a 100644 --- a/lib/libc/stdtime/tzfile.5 +++ b/lib/libc/stdtime/tzfile.5 @@ -147,6 +147,6 @@ such instants). .Xr ctime 3 , .Xr time2posix 3 , .Xr zic 8 -.\" @(#)tzfile.5 8.2 +.\" @(#)tzfile.5 8.3 .\" This file is in the public domain, so clarified as of .\" 1996-06-05 by Arthur David Olson. -- cgit v1.1 From 367048d38ba2f1d8fe8787015b16a496acc2b8e6 Mon Sep 17 00:00:00 2001 From: markus Date: Wed, 30 Dec 2009 16:37:58 +0000 Subject: MFC r200992: Use a local copy of entry_d for finding matches. Otherwise, if entry_d pointed to an entry of 'acl', all ACL entries starting with entry_d would be deleted. Approved by: emax (mentor) --- lib/libc/posix1e/acl_delete_entry.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_delete_entry.c b/lib/libc/posix1e/acl_delete_entry.c index 7dd60b8..09b4507 100644 --- a/lib/libc/posix1e/acl_delete_entry.c +++ b/lib/libc/posix1e/acl_delete_entry.c @@ -75,6 +75,7 @@ int acl_delete_entry(acl_t acl, acl_entry_t entry_d) { struct acl *acl_int; + struct acl_entry entry_int; int i, j, found = 0; if (acl == NULL || entry_d == NULL) { @@ -94,8 +95,12 @@ acl_delete_entry(acl_t acl, acl_entry_t entry_d) errno = EINVAL; return (-1); } + + /* Use a local copy to prevent deletion of more than this entry */ + entry_int = *entry_d; + for (i = 0; i < acl->ats_acl.acl_cnt;) { - if (_entry_matches(&(acl->ats_acl.acl_entry[i]), entry_d)) { + if (_entry_matches(&(acl->ats_acl.acl_entry[i]), &entry_int)) { /* ...shift the remaining entries... */ for (j = i; j < acl->ats_acl.acl_cnt - 1; ++j) acl->ats_acl.acl_entry[j] = -- cgit v1.1 From 10dd6a21bc57f7d7d159a278e7724d882e3edcb5 Mon Sep 17 00:00:00 2001 From: jh Date: Wed, 30 Dec 2009 17:22:00 +0000 Subject: MFC r199844: Reset path name back to original correctly in fts_build() when FTS_NOCHDIR option is used. fts_build() could strip a trailing slash from path name in post-order visit if a path pointing to an empty directory was given for fts_open(). PR: bin/133907, kern/134513 Approved by: trasz (mentor) --- lib/libc/gen/fts.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/fts.c b/lib/libc/gen/fts.c index 41443c5..88f1a23 100644 --- a/lib/libc/gen/fts.c +++ b/lib/libc/gen/fts.c @@ -836,11 +836,8 @@ mem1: saved_errno = errno; * If not changing directories, reset the path back to original * state. */ - if (ISSET(FTS_NOCHDIR)) { - if (len == sp->fts_pathlen || nitems == 0) - --cp; - *cp = '\0'; - } + if (ISSET(FTS_NOCHDIR)) + sp->fts_path[cur->fts_pathlen] = '\0'; /* * If descended after called from fts_children or after called from -- cgit v1.1 From 0bf920218abf0c03d6f39a336eca5ff879d5c093 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 30 Dec 2009 17:53:07 +0000 Subject: MFC 200919: Fix a bug in gzipfs that prevented lseek() from working and add lseek() support to bzip2fs. This fixes problems with loading compressed amd64 kernel modules containing debug symbols. --- lib/libstand/bzipfs.c | 67 +++++++++++++++++++++++++++++++++++++++++++++------ lib/libstand/gzipfs.c | 16 ++++++------ 2 files changed, 67 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/libstand/bzipfs.c b/lib/libstand/bzipfs.c index 47c799f..46a151b 100644 --- a/lib/libstand/bzipfs.c +++ b/lib/libstand/bzipfs.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #ifndef REGRESSION #include "stand.h" #else +#include #include #include #include @@ -42,7 +43,7 @@ struct open_file { }; #define F_READ 0x0001 /* file opened for reading */ #define EOFFSET (ELAST+8) /* relative seek not supported */ -static inline u_int min(u_int a, u_int b) { return (a < b ? a : b); } +static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); } #define panic(x, y) abort() #endif @@ -174,6 +175,8 @@ bzf_open(const char *fname, struct open_file *f) /* Construct new name */ bzfname = malloc(strlen(fname) + 5); + if (bzfname == NULL) + return(ENOMEM); sprintf(bzfname, "%s.bz2", fname); /* Try to open the compressed datafile */ @@ -195,13 +198,14 @@ bzf_open(const char *fname, struct open_file *f) /* Allocate a bz_file structure, populate it */ bzf = malloc(sizeof(struct bz_file)); + if (bzf == NULL) + return(ENOMEM); bzero(bzf, sizeof(struct bz_file)); bzf->bzf_rawfd = rawfd; - /* Verify that the file is bzipped (XXX why do this afterwards?) */ + /* Verify that the file is bzipped */ if (check_header(bzf)) { close(bzf->bzf_rawfd); - BZ2_bzDecompressEnd(&(bzf->bzf_bzstream)); free(bzf); return(EFTYPE); } @@ -247,7 +251,7 @@ bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid) if (bzf->bzf_bzstream.avail_in == 0) { /* oops, unexpected EOF */ printf("bzf_read: unexpected EOF\n"); if (bzf->bzf_bzstream.avail_out == size) - return (EIO); + return(EIO); break; } @@ -266,6 +270,50 @@ bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid) return(0); } +static int +bzf_rewind(struct open_file *f) +{ + struct bz_file *bzf = (struct bz_file *)f->f_fsdata; + struct bz_file *bzf_tmp; + + /* + * Since bzip2 does not have an equivalent inflateReset function a crude + * one needs to be provided. The functions all called in such a way that + * at any time an error occurs a role back can be done (effectively making + * this rewind 'atomic', either the reset occurs successfully or not at all, + * with no 'undefined' state happening). + */ + + /* Allocate a bz_file structure, populate it */ + bzf_tmp = malloc(sizeof(struct bz_file)); + if (bzf_tmp == NULL) + return(-1); + bzero(bzf_tmp, sizeof(struct bz_file)); + bzf_tmp->bzf_rawfd = bzf->bzf_rawfd; + + /* Initialise the inflation engine */ + if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) { + free(bzf_tmp); + return(-1); + } + + /* Seek back to the beginning of the file */ + if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) { + BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream)); + free(bzf_tmp); + return(-1); + } + + /* Free old bz_file data */ + BZ2_bzDecompressEnd(&(bzf->bzf_bzstream)); + free(bzf); + + /* Use the new bz_file data */ + f->f_fsdata = bzf_tmp; + + return(0); +} + static off_t bzf_seek(struct open_file *f, off_t offset, int where) { @@ -284,14 +332,17 @@ bzf_seek(struct open_file *f, off_t offset, int where) target = -1; default: errno = EINVAL; - return (-1); + return(-1); } /* Can we get there from here? */ - if (target < bzf->bzf_bzstream.total_out_lo32) { + if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) { errno = EOFFSET; return -1; - } + } + + /* if bzf_rewind was called then bzf has changed */ + bzf = (struct bz_file *)f->f_fsdata; /* skip forwards if required */ while (target > bzf->bzf_bzstream.total_out_lo32) { @@ -301,7 +352,7 @@ bzf_seek(struct open_file *f, off_t offset, int where) return(-1); } /* This is where we are (be honest if we overshot) */ - return (bzf->bzf_bzstream.total_out_lo32); + return(bzf->bzf_bzstream.total_out_lo32); } static int diff --git a/lib/libstand/gzipfs.c b/lib/libstand/gzipfs.c index 4f40a4a..9ff7985 100644 --- a/lib/libstand/gzipfs.c +++ b/lib/libstand/gzipfs.c @@ -212,10 +212,9 @@ zf_open(const char *fname, struct open_file *f) bzero(zf, sizeof(struct z_file)); zf->zf_rawfd = rawfd; - /* Verify that the file is gzipped (XXX why do this afterwards?) */ + /* Verify that the file is gzipped */ if (check_header(zf)) { close(zf->zf_rawfd); - inflateEnd(&(zf->zf_zstream)); free(zf); return(EFTYPE); } @@ -261,7 +260,7 @@ zf_read(struct open_file *f, void *buf, size_t size, size_t *resid) if (zf->zf_zstream.avail_in == 0) { /* oops, unexpected EOF */ printf("zf_read: unexpected EOF\n"); if (zf->zf_zstream.avail_out == size) - return (EIO); + return(EIO); break; } @@ -286,12 +285,13 @@ zf_rewind(struct open_file *f) struct z_file *zf = (struct z_file *)f->f_fsdata; if (lseek(zf->zf_rawfd, zf->zf_dataoffset, SEEK_SET) == -1) - return -1; + return(-1); zf->zf_zstream.avail_in = 0; zf->zf_zstream.next_in = NULL; + zf->zf_endseen = 0; (void)inflateReset(&zf->zf_zstream); - return 0; + return(0); } static off_t @@ -312,12 +312,12 @@ zf_seek(struct open_file *f, off_t offset, int where) target = -1; default: errno = EINVAL; - return (-1); + return(-1); } /* rewind if required */ if (target < zf->zf_zstream.total_out && zf_rewind(f) != 0) - return -1; + return(-1); /* skip forwards if required */ while (target > zf->zf_zstream.total_out) { @@ -327,7 +327,7 @@ zf_seek(struct open_file *f, off_t offset, int where) return(-1); } /* This is where we are (be honest if we overshot) */ - return (zf->zf_zstream.total_out); + return(zf->zf_zstream.total_out); } -- cgit v1.1 From b587b2f652c48043cb99be49dcdeadfac5852d06 Mon Sep 17 00:00:00 2001 From: kib Date: Fri, 1 Jan 2010 11:43:09 +0000 Subject: MFC r201201: Document CLOCK_SECOND, add cross-reference from time(3) to clock_gettime(2). MFC r201204: Document _FAST and _PRECISE clocks. --- lib/libc/gen/time.3 | 1 + lib/libc/sys/clock_gettime.2 | 55 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/time.3 b/lib/libc/gen/time.3 index 2770e49..d022e06 100644 --- a/lib/libc/gen/time.3 +++ b/lib/libc/gen/time.3 @@ -66,6 +66,7 @@ The function may fail for any of the reasons described in .Xr gettimeofday 2 . .Sh SEE ALSO +.Xr clock_gettime 2 , .Xr gettimeofday 2 , .Xr ctime 3 .Sh STANDARDS diff --git a/lib/libc/sys/clock_gettime.2 b/lib/libc/sys/clock_gettime.2 index 242af1c..a2fa624 100644 --- a/lib/libc/sys/clock_gettime.2 +++ b/lib/libc/sys/clock_gettime.2 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 4, 2006 +.Dd December 29, 2009 .Dt CLOCK_GETTIME 2 .Os .Sh NAME @@ -59,21 +59,43 @@ used by a clock which is specified by The .Fa clock_id argument -can be one of five values: -.Dv CLOCK_REALTIME +can be one of the following values: +.Dv CLOCK_REALTIME , +.Dv CLOCK_REALTIME_PRECISE , +.Dv CLOCK_REALTIME_FAST for time that increments as -a wall clock should, -.Dv CLOCK_MONOTONIC -which increments in SI seconds, -.Dv CLOCK_UPTIME +a wall clock should; +.Dv CLOCK_MONOTONIC , +.Dv CLOCK_MONOTONIC_PRECISE , +.Dv CLOCK_MONOTONIC_FAST +which increments in SI seconds; +.Dv CLOCK_UPTIME , +.Dv CLOCK_UPTIME_PRECISE , +.Dv CLOCK_UPTIME_FAST which starts at zero when the kernel boots and increments -monotonically in SI seconds while the machine is running, +monotonically in SI seconds while the machine is running; .Dv CLOCK_VIRTUAL for time that increments only when -the CPU is running in user mode on behalf of the calling process, or +the CPU is running in user mode on behalf of the calling process; .Dv CLOCK_PROF for time that increments when the CPU is running in user or -kernel mode. +kernel mode; or +.Dv CLOCK_SECOND +which returns the current second without performing a full time counter +query, using in-kernel cached value of current second. +.Pp +The clock IDs +.Fa CLOCK_REALTIME_FAST , +.Fa CLOCK_MONOTONIC_FAST , +.Fa CLOCK_UPTIME_FAST +are analogs of corresponding IDs without _FAST suffix but do not perform +a full time counter query, so their accuracy is one timer tick. +Similarly, +.Fa CLOCK_REALTIME_PRECISE , +.Fa CLOCK_MONOTONIC_PRECISE , +.Fa CLOCK_UPTIME_PRECISE +are used to get the most exact value as possible, at the expense of +execution time. .Pp The structure pointed to by .Fa tp @@ -88,7 +110,8 @@ struct timespec { }; .Ed .Pp -Only the super-user may set the time of day. +Only the super-user may set the time of day, using only +.Fa CLOCK_REALTIME . If the system securelevel is greater than 1 (see .Xr init 8 ) , the time may only be advanced. @@ -134,3 +157,13 @@ and .Fn clock_getres system calls conform to .St -p1003.1b-93 . +The clock IDs +.Fa CLOCK_REALTIME_FAST , +.Fa CLOCK_REALTIME_PRECISE , +.Fa CLOCK_MONOTONIC_FAST , +.Fa CLOCK_MONOTONIC_PRECISE , +.Fa CLOCK_UPTIME , +.Fa CLOCK_UPTIME_FAST , +.Fa CLOCK_UPTIME_PRECISE , +.Fa CLOCK_SECOND +are FreeBSD extensions to the POSIX interface. -- cgit v1.1 From 32f7e2fc4c57b31b1f38f481f98d31ced3c1f521 Mon Sep 17 00:00:00 2001 From: delphij Date: Mon, 4 Jan 2010 01:09:59 +0000 Subject: MFC r201137: Grammar fix. Submitted by: Kenyon Ralph --- lib/libexpat/libbsdxml.3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libexpat/libbsdxml.3 b/lib/libexpat/libbsdxml.3 index f687518..555bc9a 100644 --- a/lib/libexpat/libbsdxml.3 +++ b/lib/libexpat/libbsdxml.3 @@ -40,7 +40,7 @@ library is a verbatim copy of the eXpat XML library version 2.0.1. .Pp The .Nm -library is intended to use within the +library is intended to be used within the .Fx base system only. Use of the -- cgit v1.1 From 194840d7a1fa5739749e87b5bb40df6b69dfc5d9 Mon Sep 17 00:00:00 2001 From: kib Date: Tue, 5 Jan 2010 12:32:09 +0000 Subject: MFC r201194: Use clock_gettime(CLOCK_SECOND) instead of gettimeofday(2) for implementation of time(3). CLOCK_SECOND is much faster. --- lib/libc/gen/time.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/time.c b/lib/libc/gen/time.c index 840f9b6..214dfce 100644 --- a/lib/libc/gen/time.c +++ b/lib/libc/gen/time.c @@ -37,13 +37,12 @@ __FBSDID("$FreeBSD$"); #include time_t -time(t) - time_t *t; +time(time_t *t) { - struct timeval tt; + struct timespec tt; time_t retval; - if (gettimeofday(&tt, (struct timezone *)0) < 0) + if (clock_gettime(CLOCK_SECOND, &tt) < 0) retval = -1; else retval = tt.tv_sec; -- cgit v1.1 From 03e26beaeb74ba217b53b89fa85857ab89184303 Mon Sep 17 00:00:00 2001 From: kib Date: Sun, 10 Jan 2010 11:25:34 +0000 Subject: MFC r201743: Give some information on SF_MNOWAIT flag. MFC r201759 (by brueffer): Fix a typo and bump date for the previous commit. MFC r201760: Further fix grammar. --- lib/libc/sys/sendfile.2 | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/sendfile.2 b/lib/libc/sys/sendfile.2 index 322971f..d9f8cab 100644 --- a/lib/libc/sys/sendfile.2 +++ b/lib/libc/sys/sendfile.2 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 24, 2006 +.Dd January 7, 2010 .Dt SENDFILE 2 .Os .Sh NAME @@ -116,9 +116,17 @@ Busy servers may benefit by transferring requests that would block to a separate I/O worker thread. .It .Dv SF_MNOWAIT . -(description missing) +Do not wait for some kernel resource to become available, +in particular, +.Vt mbuf +and +.Vt sf_buf . +The flag does not make the +.Fn sendfile +syscall truly non-blocking, since other resources are still allocated +in a blocking fashion. .It -.Dv SF_SYNC , +.Dv SF_SYNC . .Nm sleeps until the network stack no longer references the VM pages of the file, making subsequent modifications to it safe. -- cgit v1.1 From 50f4dd01c42bf5858416f8e7e567e08596d79846 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 13 Jan 2010 18:12:21 +0000 Subject: MFC 199607, 200797, 201270, 201669: Use pthread_once() to initialize the thread-local storage for localtime() and gmtime() and _once() to initialize gmt state rather than home-rolled versions using pthread mutex locks. --- lib/libc/stdtime/localtime.c | 81 ++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdtime/localtime.c b/lib/libc/stdtime/localtime.c index 9fc5f3e..bee916b 100644 --- a/lib/libc/stdtime/localtime.c +++ b/lib/libc/stdtime/localtime.c @@ -235,9 +235,14 @@ static struct state gmtmem; static char lcl_TZname[TZ_STRLEN_MAX + 1]; static int lcl_is_set; -static int gmt_is_set; +static pthread_once_t gmt_once = PTHREAD_ONCE_INIT; static pthread_rwlock_t lcl_rwlock = PTHREAD_RWLOCK_INITIALIZER; -static pthread_mutex_t gmt_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_once_t gmtime_once = PTHREAD_ONCE_INIT; +static pthread_key_t gmtime_key; +static int gmtime_key_error; +static pthread_once_t localtime_once = PTHREAD_ONCE_INIT; +static pthread_key_t localtime_key; +static int localtime_key_error; char * tzname[2] = { wildabbr, @@ -1407,27 +1412,24 @@ struct tm * const tmp; return result; } +static void +localtime_key_init(void) +{ + + localtime_key_error = _pthread_key_create(&localtime_key, free); +} + struct tm * localtime(timep) const time_t * const timep; { - static pthread_mutex_t localtime_mutex = PTHREAD_MUTEX_INITIALIZER; - static pthread_key_t localtime_key = -1; struct tm *p_tm; - int r; if (__isthreaded != 0) { - if (localtime_key < 0) { - _pthread_mutex_lock(&localtime_mutex); - if (localtime_key < 0) { - if ((r = _pthread_key_create(&localtime_key, - free)) != 0) { - _pthread_mutex_unlock(&localtime_mutex); - errno = r; - return(NULL); - } - } - _pthread_mutex_unlock(&localtime_mutex); + _pthread_once(&localtime_once, localtime_key_init); + if (localtime_key_error != 0) { + errno = localtime_key_error; + return(NULL); } p_tm = _pthread_getspecific(localtime_key); if (p_tm == NULL) { @@ -1464,6 +1466,17 @@ struct tm * tmp; return tmp; } +static void +gmt_init(void) +{ + +#ifdef ALL_STATE + gmtptr = (struct state *) malloc(sizeof *gmtptr); + if (gmtptr != NULL) +#endif /* defined ALL_STATE */ + gmtload(gmtptr); +} + /* ** gmtsub is to gmtime as localsub is to localtime. */ @@ -1476,16 +1489,7 @@ struct tm * const tmp; { register struct tm * result; - _MUTEX_LOCK(&gmt_mutex); - if (!gmt_is_set) { -#ifdef ALL_STATE - gmtptr = (struct state *) malloc(sizeof *gmtptr); - if (gmtptr != NULL) -#endif /* defined ALL_STATE */ - gmtload(gmtptr); - gmt_is_set = TRUE; - } - _MUTEX_UNLOCK(&gmt_mutex); + _once(&gmt_once, gmt_init); result = timesub(timep, offset, gmtptr, tmp); #ifdef TM_ZONE /* @@ -1509,27 +1513,24 @@ struct tm * const tmp; return result; } +static void +gmtime_key_init(void) +{ + + gmtime_key_error = _pthread_key_create(&gmtime_key, free); +} + struct tm * gmtime(timep) const time_t * const timep; { - static pthread_mutex_t gmtime_mutex = PTHREAD_MUTEX_INITIALIZER; - static pthread_key_t gmtime_key = -1; struct tm *p_tm; - int r; if (__isthreaded != 0) { - if (gmtime_key < 0) { - _pthread_mutex_lock(&gmtime_mutex); - if (gmtime_key < 0) { - if ((r = _pthread_key_create(&gmtime_key, - free)) != 0) { - _pthread_mutex_unlock(&gmtime_mutex); - errno = r; - return(NULL); - } - } - _pthread_mutex_unlock(&gmtime_mutex); + _pthread_once(&gmtime_once, gmtime_key_init); + if (gmtime_key_error != 0) { + errno = gmtime_key_error; + return(NULL); } /* * Changed to follow POSIX.1 threads standard, which -- cgit v1.1 From 46ef0903053fbd1c63f5b29c8a492494b88c6fe3 Mon Sep 17 00:00:00 2001 From: brueffer Date: Fri, 15 Jan 2010 19:42:09 +0000 Subject: MFC: r201836 Remove unnecessary quoting and markup, add missing punctuation. --- lib/libc/stdio/getc.3 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdio/getc.3 b/lib/libc/stdio/getc.3 index c8b9386..d0f3c15 100644 --- a/lib/libc/stdio/getc.3 +++ b/lib/libc/stdio/getc.3 @@ -56,7 +56,7 @@ .Ft int .Fn getchar void .Ft int -.Fn getchar_unlocked "void" +.Fn getchar_unlocked void .Ft int .Fn getw "FILE *stream" .Sh DESCRIPTION @@ -141,7 +141,7 @@ until the condition is cleared with .Sh STANDARDS The .Fn fgetc , -.Fn getc +.Fn getc , and .Fn getchar functions @@ -167,4 +167,3 @@ The size and byte order of an varies from one machine to another, and .Fn getw is not recommended for portable applications. -.Pp -- cgit v1.1 From 3cbec0168670df0e5b4a149992b2289266d546f1 Mon Sep 17 00:00:00 2001 From: brueffer Date: Fri, 15 Jan 2010 19:53:36 +0000 Subject: MFC: r202159 Remove useless .TE groff macro. --- lib/libelf/elf.3 | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/libelf/elf.3 b/lib/libelf/elf.3 index 0727ba8..854e1af 100644 --- a/lib/libelf/elf.3 +++ b/lib/libelf/elf.3 @@ -379,7 +379,6 @@ See .It Dv SHT_SUNW_move Ta Dv ELF_T_MOVE Ta ELF move records. .It Dv SHT_SUNW_syminfo Ta Dv ELF_T_SYMINFO Ta Additional symbol flags. .El -.TE .Ss Functional Grouping This section contains a brief overview of the available functionality in the ELF library. -- cgit v1.1 From 4780cb8a56a40e9f01006416041e995fcedfcf1e Mon Sep 17 00:00:00 2001 From: marcel Date: Sat, 16 Jan 2010 04:24:10 +0000 Subject: MFC rev 201937: Implement the fo_readdir method. --- lib/libstand/dosfs.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libstand/dosfs.c b/lib/libstand/dosfs.c index 161d330..5ba10c1 100644 --- a/lib/libstand/dosfs.c +++ b/lib/libstand/dosfs.c @@ -47,6 +47,7 @@ static int dos_close(struct open_file *fd); static int dos_read(struct open_file *fd, void *buf, size_t size, size_t *resid); static off_t dos_seek(struct open_file *fd, off_t offset, int whence); static int dos_stat(struct open_file *fd, struct stat *sb); +static int dos_readdir(struct open_file *fd, struct dirent *d); struct fs_ops dosfs_fsops = { "dosfs", @@ -56,7 +57,7 @@ struct fs_ops dosfs_fsops = { null_write, dos_seek, dos_stat, - null_readdir + dos_readdir }; #define SECSIZ 512 /* sector size */ @@ -354,6 +355,72 @@ dos_stat(struct open_file *fd, struct stat *sb) return (0); } +static int +dos_readdir(struct open_file *fd, struct dirent *d) +{ + DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; + u_char fn[261]; + DOS_DIR dd; + size_t res; + u_int chk, i, x, xdn; + int err; + + x = chk = 0; + while (1) { + xdn = x; + x = 0; + err = dos_read(fd, &dd, sizeof(dd), &res); + if (err) + return (err); + if (res == sizeof(dd)) + return (ENOENT); + if (dd.de.name[0] == 0) + return (ENOENT); + + /* Skip deleted entries */ + if (dd.de.name[0] == 0xe5) + continue; + + /* Skip volume labels */ + if (dd.de.attr & FA_LABEL) + continue; + + if ((dd.de.attr & FA_MASK) == FA_XDE) { + if (dd.xde.seq & 0x40) + chk = dd.xde.chk; + else if (dd.xde.seq != xdn - 1 || dd.xde.chk != chk) + continue; + x = dd.xde.seq & ~0x40; + if (x < 1 || x > 20) { + x = 0; + continue; + } + cp_xdnm(fn, &dd.xde); + } else { + if (xdn == 1) { + x = 0; + for (i = 0; i < 11; i++) { + x = ((x & 1) << 7) | (x >> 1); + x += dd.de.name[i]; + x &= 0xff; + } + if (x == chk) + break; + } else { + cp_sfn(fn, &dd.de); + break; + } + x = 0; + } + } + + d->d_fileno = dd.de.clus[1] << 8 + dd.de.clus[0]; + d->d_reclen = sizeof(*d); + d->d_type = (dd.de.attr & FA_DIR) ? DT_DIR : DT_REG; + memcpy(d->d_name, fn, sizeof(d->d_name)); + return(0); +} + /* * Parse DOS boot sector */ -- cgit v1.1 From f803d28ff49724436bacd3932eec21253761c255 Mon Sep 17 00:00:00 2001 From: thompsa Date: Sun, 17 Jan 2010 18:24:40 +0000 Subject: MFC r202025 Reset variable fields in case the transfer is opened again --- lib/libusb/libusb20.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libusb/libusb20.c b/lib/libusb/libusb20.c index 41f9ac7..1c75fc1 100644 --- a/lib/libusb/libusb20.c +++ b/lib/libusb/libusb20.c @@ -130,8 +130,19 @@ libusb20_tr_close(struct libusb20_transfer *xfer) if (xfer->ppBuffer) { free(xfer->ppBuffer); } - /* clear some fields */ + /* reset variable fields in case the transfer is opened again */ + xfer->priv_sc0 = 0; + xfer->priv_sc1 = 0; xfer->is_opened = 0; + xfer->is_pending = 0; + xfer->is_cancel = 0; + xfer->is_draining = 0; + xfer->is_restart = 0; + xfer->status = 0; + xfer->flags = 0; + xfer->nFrames = 0; + xfer->aFrames = 0; + xfer->timeout = 0; xfer->maxFrames = 0; xfer->maxTotalLength = 0; xfer->maxPacketLen = 0; -- cgit v1.1 From 8dd66f6ac2932f88ea784062f7de0a2fee3d688e Mon Sep 17 00:00:00 2001 From: brueffer Date: Tue, 19 Jan 2010 16:51:51 +0000 Subject: MFC: r201603 Fix a double free(). --- lib/libc/rpc/getnetpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/rpc/getnetpath.c b/lib/libc/rpc/getnetpath.c index 0563544..d1ea554 100644 --- a/lib/libc/rpc/getnetpath.c +++ b/lib/libc/rpc/getnetpath.c @@ -101,7 +101,7 @@ setnetpath() if ((np_sessionp->nc_handlep = setnetconfig()) == NULL) { free(np_sessionp); syslog (LOG_ERR, "rpc: failed to open " NETCONFIG); - goto failed; + return (NULL); } np_sessionp->valid = NP_VALID; np_sessionp->ncp_list = NULL; -- cgit v1.1 From 0c4f66b0f2f83fc97fdd618b79ad08ac49bec9cd Mon Sep 17 00:00:00 2001 From: brueffer Date: Tue, 19 Jan 2010 17:09:18 +0000 Subject: MFC: r202176 Miscellaneous mdoc, spelling and inconsistency fixes. --- lib/libc/net/sctp_bindx.3 | 2 +- lib/libc/net/sctp_connectx.3 | 6 +++--- lib/libc/net/sctp_getaddrlen.3 | 2 +- lib/libc/net/sctp_getassocid.3 | 2 +- lib/libc/net/sctp_getpaddrs.3 | 6 +++--- lib/libc/net/sctp_opt_info.3 | 4 ++-- lib/libc/net/sctp_recvmsg.3 | 19 ++++++++++--------- lib/libc/net/sctp_send.3 | 2 +- lib/libc/net/sctp_sendmsg.3 | 4 ++-- lib/libc/sys/sctp_generic_recvmsg.2 | 24 ++++++++++++++---------- lib/libc/sys/sctp_generic_sendmsg.2 | 24 ++++++++++++++---------- lib/libc/sys/sctp_peeloff.2 | 4 ++-- 12 files changed, 54 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/libc/net/sctp_bindx.3 b/lib/libc/net/sctp_bindx.3 index a2a2600..d047126 100644 --- a/lib/libc/net/sctp_bindx.3 +++ b/lib/libc/net/sctp_bindx.3 @@ -90,7 +90,7 @@ The call returns 0 on success and -1 upon failure. .Sh ERRORS The .Fn sctp_bindx -can return the following errors. +function can return the following errors: .Bl -tag -width Er .It Bq Er EINVAL This value is returned if the diff --git a/lib/libc/net/sctp_connectx.3 b/lib/libc/net/sctp_connectx.3 index 7b75f4a..d454bb9 100644 --- a/lib/libc/net/sctp_connectx.3 +++ b/lib/libc/net/sctp_connectx.3 @@ -44,7 +44,7 @@ .In sys/socket.h .In netinet/sctp.h .Ft int -.Fn sctp_connectx "int s" "struct sockaddr *" "int addrcnt" "sctp_assoc_t *" +.Fn sctp_connectx "int sd" "struct sockaddr *addrs" "int addrcnt" "sctp_assoc_t *id" .Sh DESCRIPTION The .Fn sctp_connectx @@ -75,7 +75,7 @@ the extra addresses sent in the call will be silently discarded from the association. On successful completion the provided -.Fa "sctp_assoc_t *" +.Fa id will be filled in with the association identification of the newly forming association. @@ -84,7 +84,7 @@ The call returns 0 on success and -1 upon failure. .Sh ERRORS The .Fn sctp_connectx -can return the following errors. +function can return the following errors: .Bl -tag -width Er .It Bq Er EINVAL An address listed has an invalid family or no diff --git a/lib/libc/net/sctp_getaddrlen.3 b/lib/libc/net/sctp_getaddrlen.3 index 325e2b0..ac7bfb3 100644 --- a/lib/libc/net/sctp_getaddrlen.3 +++ b/lib/libc/net/sctp_getaddrlen.3 @@ -76,7 +76,7 @@ system expects for the specific address family or -1. .Sh ERRORS The .Fn sctp_getaddrlen -function can return the following errors. +function can return the following errors: .Bl -tag -width Er .It Bq Er EINVAL The address family specified does NOT exist. diff --git a/lib/libc/net/sctp_getassocid.3 b/lib/libc/net/sctp_getassocid.3 index ee0926a..909b9c7 100644 --- a/lib/libc/net/sctp_getassocid.3 +++ b/lib/libc/net/sctp_getassocid.3 @@ -58,7 +58,7 @@ The call returns the association id upon success and .Sh ERRORS The .Fn sctp_getassocid -function can return the following errors. +function can return the following errors: .Bl -tag -width Er .It Bq Er ENOENT The address does not have an association setup to it. diff --git a/lib/libc/net/sctp_getpaddrs.3 b/lib/libc/net/sctp_getpaddrs.3 index 9f6d632..6ad5ea0 100644 --- a/lib/libc/net/sctp_getpaddrs.3 +++ b/lib/libc/net/sctp_getpaddrs.3 @@ -33,7 +33,7 @@ .\" $FreeBSD$ .\" .Dd December 15, 2006 -.Dt SCTP_GETPADDR 3 +.Dt SCTP_GETPADDRS 3 .Os .Sh NAME .Nm sctp_getpaddrs , @@ -64,7 +64,7 @@ array of socket addresses returned in the argument .Fa addrs upon success. .Pp -After the caller is through the function +After the caller is finished, the function .Fn sctp_freepaddrs or .Fn sctp_freeladdrs @@ -76,7 +76,7 @@ the number of addresses returned in .Fa addrs upon success. .Sh ERRORS -The functions can return the following errors. +The functions can return the following errors: .Bl -tag -width Er .It Bq Er EINVAL An address listed has an invalid family or no diff --git a/lib/libc/net/sctp_opt_info.3 b/lib/libc/net/sctp_opt_info.3 index 560b3e4..581a220 100644 --- a/lib/libc/net/sctp_opt_info.3 +++ b/lib/libc/net/sctp_opt_info.3 @@ -45,7 +45,7 @@ .In sys/socket.h .In netinet/sctp.h .Ft int -.Fn sctp_opt_info "int s" "sctp_assoc_t" "int opt" "void *arg" "socklen_t *size" +.Fn sctp_opt_info "int sd" "sctp_assoc_t id" "int opt" "void *arg" "socklen_t *size" .Sh DESCRIPTION The .Fn sctp_opt_info @@ -90,7 +90,7 @@ socket options. .Sh ERRORS The .Fn sctp_opt_info -function can return the following errors. +function can return the following errors: .Bl -tag -width Er .It Bq Er EINVAL The argument diff --git a/lib/libc/net/sctp_recvmsg.3 b/lib/libc/net/sctp_recvmsg.3 index b9ccfb8..f874880 100644 --- a/lib/libc/net/sctp_recvmsg.3 +++ b/lib/libc/net/sctp_recvmsg.3 @@ -160,7 +160,7 @@ struct sctp_sndrcvinfo { .Pp The .Fa sinfo->sinfo_ppid -is an opaque 32 bit value that is passed transparently +field is an opaque 32 bit value that is passed transparently through the stack from the peer endpoint. Note that the stack passes this value without regard to byte order. @@ -180,12 +180,13 @@ as soon as possible. When this flag is absent the message was delivered in order within the stream it was received. .Pp +The .Fa sinfo->sinfo_stream -is the SCTP stream that the message was received on. +field is the SCTP stream that the message was received on. Streams in SCTP are reliable (or partially reliable) flows of ordered messages. .Pp -The +The .Fa sinfo->sinfo_context field is used only if the local application set an association level context with the @@ -197,7 +198,7 @@ association. .Pp The .Fa sinfo->sinfo_ssn -will hold the stream sequence number assigned +field will hold the stream sequence number assigned by the peer endpoint if the message is .Em not unordered. @@ -205,7 +206,7 @@ For unordered messages this field holds an undefined value. .Pp The .Fa sinfo->sinfo_tsn -holds a transport sequence number (TSN) that was assigned +field holds a transport sequence number (TSN) that was assigned to this message by the peer endpoint. For messages that fit in or less than the path MTU this will be the only TSN assigned. @@ -215,12 +216,12 @@ message. .Pp The .Fa sinfo->sinfo_cumtsn -holds the current cumulative acknowledgment point of +field holds the current cumulative acknowledgment point of the transport association. Note that this may be larger or smaller than the TSN assigned to the message itself. .Pp -The +The .Fa sinfo->sinfo_assoc_id is the unique association identification that was assigned to the association. @@ -232,10 +233,10 @@ setting various socket options on the specific association (see .Xr sctp 4 ) . .Pp -The +The .Fa sinfo->info_timetolive field is not used by -.Fa sctp_recvmsg . +.Fn sctp_recvmsg . .Sh RETURN VALUES The call returns the number of characters sent, or -1 if an error occurred. diff --git a/lib/libc/net/sctp_send.3 b/lib/libc/net/sctp_send.3 index 2301730..f6f0c95 100644 --- a/lib/libc/net/sctp_send.3 +++ b/lib/libc/net/sctp_send.3 @@ -294,7 +294,7 @@ if an error occurred. The .Fn sctp_send system call -fail if: +fails if: .Bl -tag -width Er .It Bq Er EBADF An invalid descriptor was specified. diff --git a/lib/libc/net/sctp_sendmsg.3 b/lib/libc/net/sctp_sendmsg.3 index 61eec22..0c5fa02 100644 --- a/lib/libc/net/sctp_sendmsg.3 +++ b/lib/libc/net/sctp_sendmsg.3 @@ -271,7 +271,7 @@ if an error occurred. The .Fn sctp_sendmsg system call -fail if: +fails if: .Bl -tag -width Er .It Bq Er EBADF An invalid descriptor was specified. @@ -324,7 +324,7 @@ is not connected and is a one-to-one style socket. .Xr sendmsg 3 , .Xr sctp 4 .Sh BUGS -Because in the one-to-many style socket the +Because in the one-to-many style socket .Fn sctp_sendmsg or .Fn sctp_sendmsgx diff --git a/lib/libc/sys/sctp_generic_recvmsg.2 b/lib/libc/sys/sctp_generic_recvmsg.2 index ccdb9db..d6fd1b5 100644 --- a/lib/libc/sys/sctp_generic_recvmsg.2 +++ b/lib/libc/sys/sctp_generic_recvmsg.2 @@ -36,7 +36,7 @@ .Os .Sh NAME .Nm sctp_generic_recvmsg -.Nd receive data from a peer. +.Nd receive data from a peer .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -46,16 +46,20 @@ .Ft int .Fn sctp_generic_recvmsg "int s" "struct iovec *iov" "int iovlen" "struct sockaddr *from" "socklen_t *fromlen" "struct sctp_sndrcvinfo *sinfo" "int *msgflags" .Sh DESCRIPTION -The .Fn sctp_generic_recvmsg -is the true system calls used by the -.Fn sctp_recvmsg -function call. This call is more efficient since it is a -true system calls but it is specific to FreeBSD and -can be expected NOT to be present on any other Operating -System. For detailed useage please see either the -.Fn sctp_recvmsg -function call. +is the true system call used by the +.Xr sctp_recvmsg 3 +function call. +This call is more efficient since it is a +true system call but it is specific to +.Fx +and can be expected +.Em not +to be present on any other operating +system. +For detailed usage please see the +.Xr sctp_recvmsg 3 +function call. .Sh RETURN VALUES The call returns the number of bytes read on success and -1 upon failure. .Sh ERRORS diff --git a/lib/libc/sys/sctp_generic_sendmsg.2 b/lib/libc/sys/sctp_generic_sendmsg.2 index d7050f2..fee4211 100644 --- a/lib/libc/sys/sctp_generic_sendmsg.2 +++ b/lib/libc/sys/sctp_generic_sendmsg.2 @@ -37,7 +37,7 @@ .Sh NAME .Nm sctp_generic_sendmsg .Nm sctp_generic_sendmsg_iov -.Nd send data to a peer. +.Nd send data to a peer .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -49,21 +49,25 @@ .Ft int .Fn sctp_generic_sendmsg_iov "int s" "struct iovec *iov" "int iovlen" "struct sockaddr *to" "struct sctp_sndrcvinfo *sinfo" "int flags" .Sh DESCRIPTION -The .Fn sctp_generic_sendmsg and .Fn sctp_generic_sendmsg_iov are the true system calls used by the -.Fn sctp_sendmsg +.Xr sctp_sendmsg 3 and -.Fn sctp_send -function calls. These are more efficient since they are -true system calls but they are specific to FreeBSD and -can be expected NOT to be present on any other Operating -System. For detailed useage please see either the -.Fn sctp_send +.Xr sctp_send 3 +function calls. +These are more efficient since they are +true system calls but they are specific to +.Fx +and can be expected +.Em not +to be present on any other operating +system. +For detailed usage please see either the +.Xr sctp_send 3 or -.Fn sctp_sendmsg +.Xr sctp_sendmsg 3 function calls. .Sh RETURN VALUES The call returns the number of bytes written on success and -1 upon failure. diff --git a/lib/libc/sys/sctp_peeloff.2 b/lib/libc/sys/sctp_peeloff.2 index 0ac5be6..c513afa 100644 --- a/lib/libc/sys/sctp_peeloff.2 +++ b/lib/libc/sys/sctp_peeloff.2 @@ -36,7 +36,7 @@ .Os .Sh NAME .Nm sctp_peeloff -.Nd detach an association from a one-to-many socket to its on fd +.Nd detach an association from a one-to-many socket to its own fd .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -58,7 +58,7 @@ upon success. .Sh ERRORS The .Fn sctp_peeloff -can return the following errors. +system call can return the following errors: .Bl -tag -width Er .It Bq Er ENOTCONN The -- cgit v1.1 From 2f8777a272e2636da3cab02155819ce00d3be75a Mon Sep 17 00:00:00 2001 From: fabient Date: Tue, 19 Jan 2010 20:55:57 +0000 Subject: MFC 202157: Bug fix: add a missing initializer. --- lib/libpmc/libpmc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index ecc697d..af716da 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -2507,6 +2507,7 @@ pmc_init(void) break; case PMC_CPU_INTEL_CORE: PMC_MDEP_INIT(core); + pmc_class_table[n] = &core_class_table_descr; break; case PMC_CPU_INTEL_CORE2: case PMC_CPU_INTEL_CORE2EXTREME: -- cgit v1.1 From 611984fd9c7f0d6f95aeb5cacf35228e2db05d9e Mon Sep 17 00:00:00 2001 From: delphij Date: Wed, 20 Jan 2010 00:52:24 +0000 Subject: MFC r200799: K&R -> ANSI prototype. --- lib/libc/stdio/vsscanf.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdio/vsscanf.c b/lib/libc/stdio/vsscanf.c index e5e9691..22b5d2b 100644 --- a/lib/libc/stdio/vsscanf.c +++ b/lib/libc/stdio/vsscanf.c @@ -45,20 +45,15 @@ eofread(void *, char *, int); /* ARGSUSED */ static int -eofread(cookie, buf, len) - void *cookie; - char *buf; - int len; +eofread(void *cookie, char *buf, int len) { return (0); } int -vsscanf(str, fmt, ap) - const char * __restrict str; - const char * __restrict fmt; - __va_list ap; +vsscanf(const char * __restrict str, const char * __restrict fmt, + __va_list ap) { FILE f; -- cgit v1.1 From b823cbbcbac890bc9443bc1ab5ad5df5a68ad952 Mon Sep 17 00:00:00 2001 From: delphij Date: Wed, 20 Jan 2010 00:53:03 +0000 Subject: MFC r200800: Use vsscanf instead of rolling our own. PR: bin/140530 Submitted by: Jeremy Huddleston --- lib/libc/stdio/sscanf.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdio/sscanf.c b/lib/libc/stdio/sscanf.c index 3c792e0..c793b86 100644 --- a/lib/libc/stdio/sscanf.c +++ b/lib/libc/stdio/sscanf.c @@ -41,37 +41,14 @@ __FBSDID("$FreeBSD$"); #include #include "local.h" -static int eofread(void *, char *, int); - -/* ARGSUSED */ -static int -eofread(cookie, buf, len) - void *cookie; - char *buf; - int len; -{ - - return (0); -} - int sscanf(const char * __restrict str, char const * __restrict fmt, ...) { int ret; va_list ap; - FILE f; - f._file = -1; - f._flags = __SRD; - f._bf._base = f._p = (unsigned char *)str; - f._bf._size = f._r = strlen(str); - f._read = eofread; - f._ub._base = NULL; - f._lb._base = NULL; - f._orientation = 0; - memset(&f._mbstate, 0, sizeof(mbstate_t)); va_start(ap, fmt); - ret = __svfscanf(&f, fmt, ap); + ret = vsscanf(str, fmt, ap); va_end(ap); return (ret); } -- cgit v1.1 From 831edd40677c273e864cffd01740ac8286bd04ba Mon Sep 17 00:00:00 2001 From: delphij Date: Wed, 20 Jan 2010 00:53:44 +0000 Subject: MFC r200802: Use vsprintf instead of rolling our own. PR: bin/140496 Submitted by: Jeremy Huddleston --- lib/libc/stdio/sprintf.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdio/sprintf.c b/lib/libc/stdio/sprintf.c index aaaae55..b55bd6c 100644 --- a/lib/libc/stdio/sprintf.c +++ b/lib/libc/stdio/sprintf.c @@ -46,17 +46,9 @@ sprintf(char * __restrict str, char const * __restrict fmt, ...) { int ret; va_list ap; - FILE f; - f._file = -1; - f._flags = __SWR | __SSTR; - f._bf._base = f._p = (unsigned char *)str; - f._bf._size = f._w = INT_MAX; - f._orientation = 0; - memset(&f._mbstate, 0, sizeof(mbstate_t)); va_start(ap, fmt); - ret = __vfprintf(&f, fmt, ap); + ret = vsprintf(str, fmt, ap); va_end(ap); - *f._p = 0; return (ret); } -- cgit v1.1 From 7670355465ac38d5365cd3c0afe593b2f90d3f62 Mon Sep 17 00:00:00 2001 From: avg Date: Fri, 22 Jan 2010 09:27:31 +0000 Subject: MFC r202585: fix a comment typo --- lib/libstand/bzipfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libstand/bzipfs.c b/lib/libstand/bzipfs.c index 46a151b..1b2e9eb 100644 --- a/lib/libstand/bzipfs.c +++ b/lib/libstand/bzipfs.c @@ -279,7 +279,7 @@ bzf_rewind(struct open_file *f) /* * Since bzip2 does not have an equivalent inflateReset function a crude * one needs to be provided. The functions all called in such a way that - * at any time an error occurs a role back can be done (effectively making + * at any time an error occurs a roll back can be done (effectively making * this rewind 'atomic', either the reset occurs successfully or not at all, * with no 'undefined' state happening). */ -- cgit v1.1 From d13809bdba98ca6d5e15d5838c916191af7b09c5 Mon Sep 17 00:00:00 2001 From: brooks Date: Fri, 22 Jan 2010 19:51:34 +0000 Subject: MFC r201350: The devices that supported EVFILT_NETDEV kqueue filters were removed in r195175. Remove all definitions, documentation, and usage. The change of function signature for vlan_link_state() was not merged to maintain the ABI. --- lib/libc/sys/kqueue.2 | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/kqueue.2 b/lib/libc/sys/kqueue.2 index e899a1b..326632d 100644 --- a/lib/libc/sys/kqueue.2 +++ b/lib/libc/sys/kqueue.2 @@ -438,19 +438,6 @@ There is a system wide limit on the number of timers which is controlled by the .Va kern.kq_calloutmax sysctl. -.It Dv EVFILT_NETDEV -Takes a descriptor to a network interface as the identifier, and the events to watch for in -.Va fflags . -It returns, when one or more of the requested events occur on the descriptor. -The events to monitor are: -.Bl -tag -width XXNOTE_LINKDOWN -.It Dv NOTE_LINKUP -The link is up. -.It Dv NOTE_LINKDOWN -The link is down. -.It Dv NOTE_LINKINV -The link state is invalid. -.El .Pp On return, .Va fflags @@ -595,13 +582,6 @@ system and this manual page were written by .An Jonathan Lemon Aq jlemon@FreeBSD.org . .Sh BUGS The -.Dv EVFILT_NETDEV -filter is currently only implemented for devices that use the -.Xr miibus 4 -driver for LINKUP and LINKDOWN operations. -Therefore, it will not work with many non-ethernet devices. -.Pp -The .Fa timeout value is limited to 24 hours; longer timeouts will be silently reinterpreted as 24 hours. -- cgit v1.1 From 4e79f22935c2afb11c50e718e8e97ea35e256c7c Mon Sep 17 00:00:00 2001 From: delphij Date: Sat, 23 Jan 2010 00:43:44 +0000 Subject: MFC r201892: Add a set of manual pages for pthread[_attr]_[sg]etaffinity(3). Reviewed by: davidxu --- lib/libc/sys/cpuset.2 | 6 ++++-- lib/libc/sys/cpuset_getaffinity.2 | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/cpuset.2 b/lib/libc/sys/cpuset.2 index fb9a770..1cdff68 100644 --- a/lib/libc/sys/cpuset.2 +++ b/lib/libc/sys/cpuset.2 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 29, 2008 +.Dd January 8, 2010 .Dt CPUSET 2 .Os .Sh NAME @@ -216,7 +216,9 @@ for allocation. .Xr cpuset 1 , .Xr cpuset_getaffinity 2 , .Xr cpuset_setaffinity 2 , -.Xr CPU_SET 3 +.Xr CPU_SET 3 , +.Xr pthread_affinity_np 3 , +.Xr pthread_attr_affinity_np 3 .Sh HISTORY The .Nm diff --git a/lib/libc/sys/cpuset_getaffinity.2 b/lib/libc/sys/cpuset_getaffinity.2 index b065ac3..c8b272f 100644 --- a/lib/libc/sys/cpuset_getaffinity.2 +++ b/lib/libc/sys/cpuset_getaffinity.2 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 29, 2008 +.Dd January 8, 2010 .Dt CPUSET 2 .Os .Sh NAME @@ -147,7 +147,9 @@ operation. .Xr cpuset 2 , .Xr cpuset_getid 2 , .Xr cpuset_setid 2 , -.Xr CPU_SET 3 +.Xr CPU_SET 3 , +.Xr pthread_affinity_np 3 , +.Xr pthread_attr_affinity_np 3 .Sh HISTORY The .Nm -- cgit v1.1 From 9fefaa1da5ac95af3adaeb90fa4952d512222d64 Mon Sep 17 00:00:00 2001 From: kib Date: Sun, 24 Jan 2010 12:35:36 +0000 Subject: Merge scandir(3) interface update to stable/8. MFC r201512: Modernize scandir(3) and alphasort(3) interfaces according to the IEEE Std 1003.1-2008. MFC r201602: Move scandir(3) and alphasort(3) into XSI namespace. MFC r201604: Use thunks to adapt alphasort-like interface to the comparision function required by qsort() and qsort_r(). MFC r202556 (by ache): Use strcoll() in opendir() and alphasort(). Remove some comments. MFC r202572 (by ache): Revert to using strcmp() for opendir(). MFC r202677 (by ache): Style. MFC r202679 (by ache): Style: rename internal function to opendir_compar(). MFC r202691 (by ache): For alphasort(3) add reference to strcoll(3). MFC r202693 (by ache): Style: reword comment. --- lib/libc/gen/opendir.c | 11 ++++++++++- lib/libc/gen/scandir.3 | 13 +++++++------ lib/libc/gen/scandir.c | 35 ++++++++++++++++++++++------------- 3 files changed, 39 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/opendir.c b/lib/libc/gen/opendir.c index 9625ec6..b312c89 100644 --- a/lib/libc/gen/opendir.c +++ b/lib/libc/gen/opendir.c @@ -92,6 +92,14 @@ __opendir2(const char *name, int flags) return __opendir_common(fd, name, flags); } +static int +opendir_compar(const void *p1, const void *p2) +{ + + return (strcmp((*(const struct dirent **)p1)->d_name, + (*(const struct dirent **)p2)->d_name)); +} + /* * Common routine for opendir(3), __opendir2(3) and fdopendir(3). */ @@ -240,7 +248,8 @@ __opendir_common(int fd, const char *name, int flags) /* * This sort must be stable. */ - mergesort(dpv, n, sizeof(*dpv), alphasort); + mergesort(dpv, n, sizeof(*dpv), + opendir_compar); dpv[n] = NULL; xp = NULL; diff --git a/lib/libc/gen/scandir.3 b/lib/libc/gen/scandir.3 index 42ccac2..e32b0d8 100644 --- a/lib/libc/gen/scandir.3 +++ b/lib/libc/gen/scandir.3 @@ -28,7 +28,7 @@ .\" @(#)scandir.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd June 4, 1993 +.Dd January 3, 2010 .Dt SCANDIR 3 .Os .Sh NAME @@ -38,12 +38,11 @@ .Sh LIBRARY .Lb libc .Sh SYNOPSIS -.In sys/types.h .In dirent.h .Ft int -.Fn scandir "const char *dirname" "struct dirent ***namelist" "int \\*(lp*select\\*(rp\\*(lpstruct dirent *\\*(rp" "int \\*(lp*compar\\*(rp\\*(lpconst void *, const void *\\*(rp" +.Fn scandir "const char *dirname" "struct dirent ***namelist" "int \\*(lp*select\\*(rp\\*(lpconst struct dirent *\\*(rp" "int \\*(lp*compar\\*(rp\\*(lpconst struct dirent **, const struct dirent **\\*(rp" .Ft int -.Fn alphasort "const void *d1" "const void *d2" +.Fn alphasort "const struct dirent **d1" "const struct dirent **d2" .Sh DESCRIPTION The .Fn scandir @@ -82,7 +81,8 @@ The function is a routine which can be used for the .Fa compar -argument to sort the array alphabetically. +argument to sort the array alphabetically using +.Xr strcoll 3 . .Pp The memory allocated for the array can be deallocated with .Xr free 3 , @@ -95,7 +95,8 @@ cannot allocate enough memory to hold all the data structures. .Xr directory 3 , .Xr malloc 3 , .Xr qsort 3 , -.Xr dir 5 +.Xr dir 5 , +.Xr strcoll 3 .Sh HISTORY The .Fn scandir diff --git a/lib/libc/gen/scandir.c b/lib/libc/gen/scandir.c index 1dae85d..93bc852 100644 --- a/lib/libc/gen/scandir.c +++ b/lib/libc/gen/scandir.c @@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$"); #include #include "un-namespace.h" +static int alphasort_thunk(void *thunk, const void *p1, const void *p2); + /* * The DIRSIZ macro is the minimum record length which will hold the directory * entry. This requires the amount of space in struct dirent without the @@ -58,11 +60,9 @@ __FBSDID("$FreeBSD$"); (((dp)->d_namlen + 1 + 3) &~ 3)) int -scandir(dirname, namelist, select, dcomp) - const char *dirname; - struct dirent ***namelist; - int (*select)(struct dirent *); - int (*dcomp)(const void *, const void *); +scandir(const char *dirname, struct dirent ***namelist, + int (*select)(const struct dirent *), int (*dcomp)(const struct dirent **, + const struct dirent **)) { struct dirent *d, *p, **names = NULL; size_t nitems = 0; @@ -111,26 +111,35 @@ scandir(dirname, namelist, select, dcomp) } closedir(dirp); if (nitems && dcomp != NULL) - qsort(names, nitems, sizeof(struct dirent *), dcomp); + qsort_r(names, nitems, sizeof(struct dirent *), + &dcomp, alphasort_thunk); *namelist = names; - return(nitems); + return (nitems); fail: while (nitems > 0) free(names[--nitems]); free(names); closedir(dirp); - return -1; + return (-1); } /* * Alphabetic order comparison routine for those who want it. + * POSIX 2008 requires that alphasort() uses strcoll(). */ int -alphasort(d1, d2) - const void *d1; - const void *d2; +alphasort(const struct dirent **d1, const struct dirent **d2) +{ + + return (strcoll((*d1)->d_name, (*d2)->d_name)); +} + +static int +alphasort_thunk(void *thunk, const void *p1, const void *p2) { - return(strcmp((*(struct dirent **)d1)->d_name, - (*(struct dirent **)d2)->d_name)); + int (*dc)(const struct dirent **, const struct dirent **); + + dc = *(int (**)(const struct dirent **, const struct dirent **))thunk; + return (dc((const struct dirent **)p1, (const struct dirent **)p2)); } -- cgit v1.1 From 30e602e5e05f10f1ec2803b6f491152d8fdf8597 Mon Sep 17 00:00:00 2001 From: ed Date: Sun, 24 Jan 2010 14:30:57 +0000 Subject: MFC r202500: Fix a regression that was introduced in r191882. I changed login_tty() to only work when the application is not a session leader yet. This works fine for applications in the base system, but it turns out various applications call this function after daemonizing, which means they already use their own session. If setsid() fails, just call tcsetsid() on the current session. tcsetsid() will already perform proper security checks. Reported by: Oliver Lehmann --- lib/libutil/login_tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libutil/login_tty.c b/lib/libutil/login_tty.c index a14e244..92dc87f 100644 --- a/lib/libutil/login_tty.c +++ b/lib/libutil/login_tty.c @@ -50,7 +50,7 @@ login_tty(int fd) s = setsid(); if (s == -1) - return (-1); + s = getsid(0); if (tcsetsid(fd, s) == -1) return (-1); (void) dup2(fd, 0); -- cgit v1.1 From ae4685a7b06726fc32aff3db278a4a5b1b2c0c1a Mon Sep 17 00:00:00 2001 From: das Date: Sun, 24 Jan 2010 20:15:59 +0000 Subject: MFC r197752: Better glibc compatibility for getline/getdelim: - Tolerate applications that pass a NULL pointer for the buffer and claim that the capacity of the buffer is nonzero. - If an application passes in a non-NULL buffer pointer and claims the buffer has zero capacity, we should free (well, realloc) it anyway. It could have been obtained from malloc(0), so failing to free it would be a small memory leak. --- lib/libc/stdio/getdelim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdio/getdelim.c b/lib/libc/stdio/getdelim.c index 7af154f..d7d5627 100644 --- a/lib/libc/stdio/getdelim.c +++ b/lib/libc/stdio/getdelim.c @@ -120,8 +120,8 @@ getdelim(char ** __restrict linep, size_t * __restrict linecapp, int delim, goto error; } - if (*linecapp == 0) - *linep = NULL; + if (*linep == NULL) + *linecapp = 0; if (fp->_r <= 0 && __srefill(fp)) { /* If fp is at EOF already, we just need space for the NUL. */ -- cgit v1.1 From 430383e8e29efa3f53ea43b953bfbbce6afd1334 Mon Sep 17 00:00:00 2001 From: emax Date: Tue, 26 Jan 2010 00:38:56 +0000 Subject: MFC SVN rev 198492 Fix typo in bluetooth.3 Do not use reserved C++ keyword "new" --- lib/libbluetooth/bluetooth.3 | 2 +- lib/libbluetooth/bluetooth.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libbluetooth/bluetooth.3 b/lib/libbluetooth/bluetooth.3 index c4eab90..40ea20c 100644 --- a/lib/libbluetooth/bluetooth.3 +++ b/lib/libbluetooth/bluetooth.3 @@ -272,7 +272,7 @@ otherwise 0. .Pp The .Fn bt_devinfo -function populates prodivded +function populates provided .Vt bt_devinfo structure with the information about given Bluetooth device. The caller is expected to pass Bluetooth device name in the diff --git a/lib/libbluetooth/bluetooth.h b/lib/libbluetooth/bluetooth.h index 5ad3c3c..a73dbe9 100644 --- a/lib/libbluetooth/bluetooth.h +++ b/lib/libbluetooth/bluetooth.h @@ -163,8 +163,8 @@ int bt_devclose(int s); int bt_devsend (int s, uint16_t opcode, void *param, size_t plen); ssize_t bt_devrecv (int s, void *buf, size_t size, time_t to); int bt_devreq (int s, struct bt_devreq *r, time_t to); -int bt_devfilter(int s, struct bt_devfilter const *new, - struct bt_devfilter *old); +int bt_devfilter(int s, struct bt_devfilter const *newp, + struct bt_devfilter *oldp); void bt_devfilter_pkt_set(struct bt_devfilter *filter, uint8_t type); void bt_devfilter_pkt_clr(struct bt_devfilter *filter, uint8_t type); int bt_devfilter_pkt_tst(struct bt_devfilter const *filter, uint8_t type); -- cgit v1.1 From 55f26a2e9c5ce78a54d723f154a436053f46cd3e Mon Sep 17 00:00:00 2001 From: des Date: Tue, 26 Jan 2010 14:15:12 +0000 Subject: MFH (r202613, r202623): HTTP digest authentication support. --- lib/libfetch/Makefile | 7 +- lib/libfetch/http.c | 882 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 821 insertions(+), 68 deletions(-) (limited to 'lib') diff --git a/lib/libfetch/Makefile b/lib/libfetch/Makefile index 3576db7..5091adf 100644 --- a/lib/libfetch/Makefile +++ b/lib/libfetch/Makefile @@ -16,8 +16,11 @@ CFLAGS+= -DINET6 .if ${MK_OPENSSL} != "no" CFLAGS+= -DWITH_SSL -DPADD= ${LIBSSL} ${LIBCRYPTO} -LDADD= -lssl -lcrypto +DPADD= ${LIBSSL} ${LIBCRYPTO} ${LIBMD} +LDADD= -lssl -lcrypto -lmd +.else +DPADD= ${LIBMD} +LDADD= -lmd .endif CFLAGS+= -DFTP_COMBINE_CWDS diff --git a/lib/libfetch/http.c b/lib/libfetch/http.c index dd98349..f44366e 100644 --- a/lib/libfetch/http.c +++ b/lib/libfetch/http.c @@ -76,6 +76,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -343,7 +344,8 @@ typedef enum { hdr_last_modified, hdr_location, hdr_transfer_encoding, - hdr_www_authenticate + hdr_www_authenticate, + hdr_proxy_authenticate, } hdr_t; /* Names of interesting headers */ @@ -357,6 +359,7 @@ static struct { { hdr_location, "Location" }, { hdr_transfer_encoding, "Transfer-Encoding" }, { hdr_www_authenticate, "WWW-Authenticate" }, + { hdr_proxy_authenticate, "Proxy-Authenticate" }, { hdr_unknown, NULL }, }; @@ -446,21 +449,114 @@ http_match(const char *str, const char *hdr) return (hdr); } + /* - * Get the next header and return the appropriate symbolic code. + * Get the next header and return the appropriate symbolic code. We + * need to read one line ahead for checking for a continuation line + * belonging to the current header (continuation lines start with + * white space). + * + * We get called with a fresh line already in the conn buffer, either + * from the previous http_next_header() invocation, or, the first + * time, from a fetch_getln() performed by our caller. + * + * This stops when we encounter an empty line (we dont read beyond the header + * area). + * + * Note that the "headerbuf" is just a place to return the result. Its + * contents are not used for the next call. This means that no cleanup + * is needed when ie doing another connection, just call the cleanup when + * fully done to deallocate memory. */ -static hdr_t -http_next_header(conn_t *conn, const char **p) + +/* Limit the max number of continuation lines to some reasonable value */ +#define HTTP_MAX_CONT_LINES 10 + +/* Place into which to build a header from one or several lines */ +typedef struct { + char *buf; /* buffer */ + size_t bufsize; /* buffer size */ + size_t buflen; /* length of buffer contents */ +} http_headerbuf_t; + +static void +init_http_headerbuf(http_headerbuf_t *buf) { - int i; + buf->buf = NULL; + buf->bufsize = 0; + buf->buflen = 0; +} - if (fetch_getln(conn) == -1) - return (hdr_syserror); - while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1])) +static void +clean_http_headerbuf(http_headerbuf_t *buf) +{ + if (buf->buf) + free(buf->buf); + init_http_headerbuf(buf); +} + +/* Remove whitespace at the end of the buffer */ +static void +http_conn_trimright(conn_t *conn) +{ + while (conn->buflen && + isspace((unsigned char)conn->buf[conn->buflen - 1])) conn->buflen--; conn->buf[conn->buflen] = '\0'; +} + +static hdr_t +http_next_header(conn_t *conn, http_headerbuf_t *hbuf, const char **p) +{ + int i, len; + + /* + * Have to do the stripping here because of the first line. So + * it's done twice for the subsequent lines. No big deal + */ + http_conn_trimright(conn); if (conn->buflen == 0) return (hdr_end); + + /* Copy the line to the headerbuf */ + if (hbuf->bufsize < conn->buflen + 1) { + if ((hbuf->buf = realloc(hbuf->buf, conn->buflen + 1)) == NULL) + return (hdr_syserror); + hbuf->bufsize = conn->buflen + 1; + } + strcpy(hbuf->buf, conn->buf); + hbuf->buflen = conn->buflen; + + /* + * Fetch possible continuation lines. Stop at 1st non-continuation + * and leave it in the conn buffer + */ + for (i = 0; i < HTTP_MAX_CONT_LINES; i++) { + if (fetch_getln(conn) == -1) + return (hdr_syserror); + + /* + * Note: we carry on the idea from the previous version + * that a pure whitespace line is equivalent to an empty + * one (so it's not continuation and will be handled when + * we are called next) + */ + http_conn_trimright(conn); + if (conn->buf[0] != ' ' && conn->buf[0] != "\t"[0]) + break; + + /* Got a continuation line. Concatenate to previous */ + len = hbuf->buflen + conn->buflen; + if (hbuf->bufsize < len + 1) { + len *= 2; + if ((hbuf->buf = realloc(hbuf->buf, len + 1)) == NULL) + return (hdr_syserror); + hbuf->bufsize = len + 1; + } + strcpy(hbuf->buf + hbuf->buflen, conn->buf); + hbuf->buflen += conn->buflen; + } + /* * We could check for malformed headers but we don't really care. * A valid header starts with a token immediately followed by a @@ -468,11 +564,290 @@ http_next_header(conn_t *conn, const char **p) * characters except "()<>@,;:\\\"{}". */ for (i = 0; hdr_names[i].num != hdr_unknown; i++) - if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL) + if ((*p = http_match(hdr_names[i].name, hbuf->buf)) != NULL) return (hdr_names[i].num); + return (hdr_unknown); } +/************************** + * [Proxy-]Authenticate header parsing + */ + +/* + * Read doublequote-delimited string into output buffer obuf (allocated + * by caller, whose responsibility it is to ensure that it's big enough) + * cp points to the first char after the initial '"' + * Handles \ quoting + * Returns pointer to the first char after the terminating double quote, or + * NULL for error. + */ +static const char * +http_parse_headerstring(const char *cp, char *obuf) +{ + for (;;) { + switch (*cp) { + case 0: /* Unterminated string */ + *obuf = 0; + return (NULL); + case '"': /* Ending quote */ + *obuf = 0; + return (++cp); + case '\\': + if (*++cp == 0) { + *obuf = 0; + return (NULL); + } + /* FALLTHROUGH */ + default: + *obuf++ = *cp++; + } + } +} + +/* Http auth challenge schemes */ +typedef enum {HTTPAS_UNKNOWN, HTTPAS_BASIC,HTTPAS_DIGEST} http_auth_schemes_t; + +/* Data holder for a Basic or Digest challenge. */ +typedef struct { + http_auth_schemes_t scheme; + char *realm; + char *qop; + char *nonce; + char *opaque; + char *algo; + int stale; + int nc; /* Nonce count */ +} http_auth_challenge_t; + +static void +init_http_auth_challenge(http_auth_challenge_t *b) +{ + b->scheme = HTTPAS_UNKNOWN; + b->realm = b->qop = b->nonce = b->opaque = b->algo = NULL; + b->stale = b->nc = 0; +} + +static void +clean_http_auth_challenge(http_auth_challenge_t *b) +{ + if (b->realm) + free(b->realm); + if (b->qop) + free(b->qop); + if (b->nonce) + free(b->nonce); + if (b->opaque) + free(b->opaque); + if (b->algo) + free(b->algo); + init_http_auth_challenge(b); +} + +/* Data holder for an array of challenges offered in an http response. */ +#define MAX_CHALLENGES 10 +typedef struct { + http_auth_challenge_t *challenges[MAX_CHALLENGES]; + int count; /* Number of parsed challenges in the array */ + int valid; /* We did parse an authenticate header */ +} http_auth_challenges_t; + +static void +init_http_auth_challenges(http_auth_challenges_t *cs) +{ + int i; + for (i = 0; i < MAX_CHALLENGES; i++) + cs->challenges[i] = NULL; + cs->count = cs->valid = 0; +} + +static void +clean_http_auth_challenges(http_auth_challenges_t *cs) +{ + int i; + /* We rely on non-zero pointers being allocated, not on the count */ + for (i = 0; i < MAX_CHALLENGES; i++) { + if (cs->challenges[i] != NULL) { + clean_http_auth_challenge(cs->challenges[i]); + free(cs->challenges[i]); + } + } + init_http_auth_challenges(cs); +} + +/* + * Enumeration for lexical elements. Separators will be returned as their own + * ascii value + */ +typedef enum {HTTPHL_WORD=256, HTTPHL_STRING=257, HTTPHL_END=258, + HTTPHL_ERROR = 259} http_header_lex_t; + +/* + * Determine what kind of token comes next and return possible value + * in buf, which is supposed to have been allocated big enough by + * caller. Advance input pointer and return element type. + */ +static int +http_header_lex(const char **cpp, char *buf) +{ + size_t l; + /* Eat initial whitespace */ + *cpp += strspn(*cpp, " \t"); + if (**cpp == 0) + return (HTTPHL_END); + + /* Separator ? */ + if (**cpp == ',' || **cpp == '=') + return (*((*cpp)++)); + + /* String ? */ + if (**cpp == '"') { + *cpp = http_parse_headerstring(++*cpp, buf); + if (*cpp == NULL) + return (HTTPHL_ERROR); + return (HTTPHL_STRING); + } + + /* Read other token, until separator or whitespace */ + l = strcspn(*cpp, " \t,="); + memcpy(buf, *cpp, l); + buf[l] = 0; + *cpp += l; + return (HTTPHL_WORD); +} + +/* + * Read challenges from http xxx-authenticate header and accumulate them + * in the challenges list structure. + * + * Headers with multiple challenges are specified by rfc2617, but + * servers (ie: squid) often send them in separate headers instead, + * which in turn is forbidden by the http spec (multiple headers with + * the same name are only allowed for pure comma-separated lists, see + * rfc2616 sec 4.2). + * + * We support both approaches anyway + */ +static int +http_parse_authenticate(const char *cp, http_auth_challenges_t *cs) +{ + int ret = -1; + http_header_lex_t lex; + char *key = malloc(strlen(cp) + 1); + char *value = malloc(strlen(cp) + 1); + char *buf = malloc(strlen(cp) + 1); + + if (key == NULL || value == NULL || buf == NULL) { + fetch_syserr(); + goto out; + } + + /* In any case we've seen the header and we set the valid bit */ + cs->valid = 1; + + /* Need word first */ + lex = http_header_lex(&cp, key); + if (lex != HTTPHL_WORD) + goto out; + + /* Loop on challenges */ + for (; cs->count < MAX_CHALLENGES; cs->count++) { + cs->challenges[cs->count] = + malloc(sizeof(http_auth_challenge_t)); + if (cs->challenges[cs->count] == NULL) { + fetch_syserr(); + goto out; + } + init_http_auth_challenge(cs->challenges[cs->count]); + if (!strcasecmp(key, "basic")) { + cs->challenges[cs->count]->scheme = HTTPAS_BASIC; + } else if (!strcasecmp(key, "digest")) { + cs->challenges[cs->count]->scheme = HTTPAS_DIGEST; + } else { + cs->challenges[cs->count]->scheme = HTTPAS_UNKNOWN; + /* + * Continue parsing as basic or digest may + * follow, and the syntax is the same for + * all. We'll just ignore this one when + * looking at the list + */ + } + + /* Loop on attributes */ + for (;;) { + /* Key */ + lex = http_header_lex(&cp, key); + if (lex != HTTPHL_WORD) + goto out; + + /* Equal sign */ + lex = http_header_lex(&cp, buf); + if (lex != '=') + goto out; + + /* Value */ + lex = http_header_lex(&cp, value); + if (lex != HTTPHL_WORD && lex != HTTPHL_STRING) + goto out; + + if (!strcasecmp(key, "realm")) + cs->challenges[cs->count]->realm = + strdup(value); + else if (!strcasecmp(key, "qop")) + cs->challenges[cs->count]->qop = + strdup(value); + else if (!strcasecmp(key, "nonce")) + cs->challenges[cs->count]->nonce = + strdup(value); + else if (!strcasecmp(key, "opaque")) + cs->challenges[cs->count]->opaque = + strdup(value); + else if (!strcasecmp(key, "algorithm")) + cs->challenges[cs->count]->algo = + strdup(value); + else if (!strcasecmp(key, "stale")) + cs->challenges[cs->count]->stale = + strcasecmp(value, "no"); + /* Else ignore unknown attributes */ + + /* Comma or Next challenge or End */ + lex = http_header_lex(&cp, key); + /* + * If we get a word here, this is the beginning of the + * next challenge. Break the attributes loop + */ + if (lex == HTTPHL_WORD) + break; + + if (lex == HTTPHL_END) { + /* End while looking for ',' is normal exit */ + cs->count++; + ret = 0; + goto out; + } + /* Anything else is an error */ + if (lex != ',') + goto out; + + } /* End attributes loop */ + } /* End challenge loop */ + + /* + * Challenges max count exceeded. This really can't happen + * with normal data, something's fishy -> error + */ + +out: + if (key) + free(key); + if (value) + free(value); + if (buf) + free(buf); + return (ret); +} + + /* * Parse a last-modified header */ @@ -618,6 +993,291 @@ http_base64(const char *src) return (str); } + +/* + * Extract authorization parameters from environment value. + * The value is like scheme:realm:user:pass + */ +typedef struct { + char *scheme; + char *realm; + char *user; + char *password; +} http_auth_params_t; + +static void +init_http_auth_params(http_auth_params_t *s) +{ + s->scheme = s->realm = s->user = s->password = 0; +} + +static void +clean_http_auth_params(http_auth_params_t *s) +{ + if (s->scheme) + free(s->scheme); + if (s->realm) + free(s->realm); + if (s->user) + free(s->user); + if (s->password) + free(s->password); + init_http_auth_params(s); +} + +static int +http_authfromenv(const char *p, http_auth_params_t *parms) +{ + int ret = -1; + char *v, *ve; + char *str = strdup(p); + + if (str == NULL) { + fetch_syserr(); + return (-1); + } + v = str; + + if ((ve = strchr(v, ':')) == NULL) + goto out; + + *ve = 0; + if ((parms->scheme = strdup(v)) == NULL) { + fetch_syserr(); + goto out; + } + v = ve + 1; + + if ((ve = strchr(v, ':')) == NULL) + goto out; + + *ve = 0; + if ((parms->realm = strdup(v)) == NULL) { + fetch_syserr(); + goto out; + } + v = ve + 1; + + if ((ve = strchr(v, ':')) == NULL) + goto out; + + *ve = 0; + if ((parms->user = strdup(v)) == NULL) { + fetch_syserr(); + goto out; + } + v = ve + 1; + + + if ((parms->password = strdup(v)) == NULL) { + fetch_syserr(); + goto out; + } + ret = 0; +out: + if (ret == -1) + clean_http_auth_params(parms); + if (str) + free(str); + return (ret); +} + + +/* + * Digest response: the code to compute the digest is taken from the + * sample implementation in RFC2616 + */ +#define IN +#define OUT + +#define HASHLEN 16 +typedef char HASH[HASHLEN]; +#define HASHHEXLEN 32 +typedef char HASHHEX[HASHHEXLEN+1]; + +static const char *hexchars = "0123456789abcdef"; +static void +CvtHex(IN HASH Bin, OUT HASHHEX Hex) +{ + unsigned short i; + unsigned char j; + + for (i = 0; i < HASHLEN; i++) { + j = (Bin[i] >> 4) & 0xf; + Hex[i*2] = hexchars[j]; + j = Bin[i] & 0xf; + Hex[i*2+1] = hexchars[j]; + }; + Hex[HASHHEXLEN] = '\0'; +}; + +/* calculate H(A1) as per spec */ +static void +DigestCalcHA1( + IN char * pszAlg, + IN char * pszUserName, + IN char * pszRealm, + IN char * pszPassword, + IN char * pszNonce, + IN char * pszCNonce, + OUT HASHHEX SessionKey + ) +{ + MD5_CTX Md5Ctx; + HASH HA1; + + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, pszUserName, strlen(pszUserName)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszRealm, strlen(pszRealm)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszPassword, strlen(pszPassword)); + MD5Final(HA1, &Md5Ctx); + if (strcasecmp(pszAlg, "md5-sess") == 0) { + + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, HA1, HASHLEN); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce)); + MD5Final(HA1, &Md5Ctx); + }; + CvtHex(HA1, SessionKey); +} + +/* calculate request-digest/response-digest as per HTTP Digest spec */ +static void +DigestCalcResponse( + IN HASHHEX HA1, /* H(A1) */ + IN char * pszNonce, /* nonce from server */ + IN char * pszNonceCount, /* 8 hex digits */ + IN char * pszCNonce, /* client nonce */ + IN char * pszQop, /* qop-value: "", "auth", "auth-int" */ + IN char * pszMethod, /* method from the request */ + IN char * pszDigestUri, /* requested URL */ + IN HASHHEX HEntity, /* H(entity body) if qop="auth-int" */ + OUT HASHHEX Response /* request-digest or response-digest */ + ) +{ +/* DEBUG(fprintf(stderr, + "Calc: HA1[%s] Nonce[%s] qop[%s] method[%s] URI[%s]\n", + HA1, pszNonce, pszQop, pszMethod, pszDigestUri));*/ + MD5_CTX Md5Ctx; + HASH HA2; + HASH RespHash; + HASHHEX HA2Hex; + + // calculate H(A2) + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, pszMethod, strlen(pszMethod)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszDigestUri, strlen(pszDigestUri)); + if (strcasecmp(pszQop, "auth-int") == 0) { + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, HEntity, HASHHEXLEN); + }; + MD5Final(HA2, &Md5Ctx); + CvtHex(HA2, HA2Hex); + + // calculate response + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, HA1, HASHHEXLEN); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce)); + MD5Update(&Md5Ctx, ":", 1); + if (*pszQop) { + MD5Update(&Md5Ctx, pszNonceCount, strlen(pszNonceCount)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszQop, strlen(pszQop)); + MD5Update(&Md5Ctx, ":", 1); + }; + MD5Update(&Md5Ctx, HA2Hex, HASHHEXLEN); + MD5Final(RespHash, &Md5Ctx); + CvtHex(RespHash, Response); +} + +/* + * Generate/Send a Digest authorization header + * This looks like: [Proxy-]Authorization: credentials + * + * credentials = "Digest" digest-response + * digest-response = 1#( username | realm | nonce | digest-uri + * | response | [ algorithm ] | [cnonce] | + * [opaque] | [message-qop] | + * [nonce-count] | [auth-param] ) + * username = "username" "=" username-value + * username-value = quoted-string + * digest-uri = "uri" "=" digest-uri-value + * digest-uri-value = request-uri ; As specified by HTTP/1.1 + * message-qop = "qop" "=" qop-value + * cnonce = "cnonce" "=" cnonce-value + * cnonce-value = nonce-value + * nonce-count = "nc" "=" nc-value + * nc-value = 8LHEX + * response = "response" "=" request-digest + * request-digest = <"> 32LHEX <"> + */ +static int +http_digest_auth(conn_t *conn, const char *hdr, http_auth_challenge_t *c, + http_auth_params_t *parms, struct url *url) +{ + int r; + char noncecount[10]; + char cnonce[40]; + char *options = 0; + + if (!c->realm || !c->nonce) { + DEBUG(fprintf(stderr, "realm/nonce not set in challenge\n")); + return(-1); + } + if (!c->algo) + c->algo = strdup(""); + + if (asprintf(&options, "%s%s%s%s", + *c->algo? ",algorithm=" : "", c->algo, + c->opaque? ",opaque=" : "", c->opaque?c->opaque:"")== -1) + return (-1); + + if (!c->qop) { + c->qop = strdup(""); + *noncecount = 0; + *cnonce = 0; + } else { + c->nc++; + sprintf(noncecount, "%08x", c->nc); + /* We don't try very hard with the cnonce ... */ + sprintf(cnonce, "%x%lx", getpid(), (unsigned long)time(0)); + } + + HASHHEX HA1; + DigestCalcHA1(c->algo, parms->user, c->realm, + parms->password, c->nonce, cnonce, HA1); + DEBUG(fprintf(stderr, "HA1: [%s]\n", HA1)); + HASHHEX digest; + DigestCalcResponse(HA1, c->nonce, noncecount, cnonce, c->qop, + "GET", url->doc, "", digest); + + if (c->qop[0]) { + r = http_cmd(conn, "%s: Digest username=\"%s\",realm=\"%s\"," + "nonce=\"%s\",uri=\"%s\",response=\"%s\"," + "qop=\"auth\", cnonce=\"%s\", nc=%s%s", + hdr, parms->user, c->realm, + c->nonce, url->doc, digest, + cnonce, noncecount, options); + } else { + r = http_cmd(conn, "%s: Digest username=\"%s\",realm=\"%s\"," + "nonce=\"%s\",uri=\"%s\",response=\"%s\"%s", + hdr, parms->user, c->realm, + c->nonce, url->doc, digest, options); + } + if (options) + free(options); + return (r); +} + /* * Encode username and password */ @@ -627,8 +1287,8 @@ http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) char *upw, *auth; int r; - DEBUG(fprintf(stderr, "usr: [%s]\n", usr)); - DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd)); + DEBUG(fprintf(stderr, "basic: usr: [%s]\n", usr)); + DEBUG(fprintf(stderr, "basic: pwd: [%s]\n", pwd)); if (asprintf(&upw, "%s:%s", usr, pwd) == -1) return (-1); auth = http_base64(upw); @@ -641,33 +1301,49 @@ http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) } /* - * Send an authorization header + * Chose the challenge to answer and call the appropriate routine to + * produce the header. */ static int -http_authorize(conn_t *conn, const char *hdr, const char *p) +http_authorize(conn_t *conn, const char *hdr, http_auth_challenges_t *cs, + http_auth_params_t *parms, struct url *url) { - /* basic authorization */ - if (strncasecmp(p, "basic:", 6) == 0) { - char *user, *pwd, *str; - int r; - - /* skip realm */ - for (p += 6; *p && *p != ':'; ++p) - /* nothing */ ; - if (!*p || strchr(++p, ':') == NULL) - return (-1); - if ((str = strdup(p)) == NULL) - return (-1); /* XXX */ - user = str; - pwd = strchr(str, ':'); - *pwd++ = '\0'; - r = http_basic_auth(conn, hdr, user, pwd); - free(str); - return (r); + http_auth_challenge_t *basic = NULL; + http_auth_challenge_t *digest = NULL; + int i; + + /* If user or pass are null we're not happy */ + if (!parms->user || !parms->password) { + DEBUG(fprintf(stderr, "NULL usr or pass\n")); + return (-1); + } + + /* Look for a Digest and a Basic challenge */ + for (i = 0; i < cs->count; i++) { + if (cs->challenges[i]->scheme == HTTPAS_BASIC) + basic = cs->challenges[i]; + if (cs->challenges[i]->scheme == HTTPAS_DIGEST) + digest = cs->challenges[i]; } - return (-1); -} + /* Error if "Digest" was specified and there is no Digest challenge */ + if (!digest && (parms->scheme && + !strcasecmp(parms->scheme, "digest"))) { + DEBUG(fprintf(stderr, + "Digest auth in env, not supported by peer\n")); + return (-1); + } + /* + * If "basic" was specified in the environment, or there is no Digest + * challenge, do the basic thing. Don't need a challenge for this, + * so no need to check basic!=NULL + */ + if (!digest || (parms->scheme && !strcasecmp(parms->scheme,"basic"))) + return (http_basic_auth(conn,hdr,parms->user,parms->password)); + + /* Else, prefer digest. We just checked that it's not NULL */ + return (http_digest_auth(conn, hdr, digest, parms, url)); +} /***************************************************************************** * Helper functions for connecting to a server or proxy @@ -797,13 +1473,13 @@ http_print_html(FILE *out, FILE *in) */ FILE * http_request(struct url *URL, const char *op, struct url_stat *us, - struct url *purl, const char *flags) + struct url *purl, const char *flags) { char timebuf[80]; char hbuf[MAXHOSTNAMELEN + 7], *host; conn_t *conn; struct url *url, *new; - int chunked, direct, ims, need_auth, noredirect, verbose; + int chunked, direct, ims, noredirect, verbose; int e, i, n, val; off_t offset, clength, length, size; time_t mtime; @@ -811,6 +1487,14 @@ http_request(struct url *URL, const char *op, struct url_stat *us, FILE *f; hdr_t h; struct tm *timestruct; + http_headerbuf_t headerbuf; + http_auth_challenges_t server_challenges; + http_auth_challenges_t proxy_challenges; + + /* The following calls don't allocate anything */ + init_http_headerbuf(&headerbuf); + init_http_auth_challenges(&server_challenges); + init_http_auth_challenges(&proxy_challenges); direct = CHECK_FLAG('d'); noredirect = CHECK_FLAG('A'); @@ -830,7 +1514,6 @@ http_request(struct url *URL, const char *op, struct url_stat *us, i = 0; e = HTTP_PROTOCOL_ERROR; - need_auth = 0; do { new = NULL; chunked = 0; @@ -895,27 +1578,67 @@ http_request(struct url *URL, const char *op, struct url_stat *us, /* virtual host */ http_cmd(conn, "Host: %s", host); - /* proxy authorization */ - if (purl) { - if (*purl->user || *purl->pwd) - http_basic_auth(conn, "Proxy-Authorization", - purl->user, purl->pwd); - else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0') - http_authorize(conn, "Proxy-Authorization", p); + /* + * Proxy authorization: we only send auth after we received + * a 407 error. We do not first try basic anyway (changed + * when support was added for digest-auth) + */ + if (purl && proxy_challenges.valid) { + http_auth_params_t aparams; + init_http_auth_params(&aparams); + if (*purl->user || *purl->pwd) { + aparams.user = purl->user ? + strdup(purl->user) : strdup(""); + aparams.password = purl->pwd? + strdup(purl->pwd) : strdup(""); + } else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && + *p != '\0') { + if (http_authfromenv(p, &aparams) < 0) { + http_seterr(HTTP_NEED_PROXY_AUTH); + goto ouch; + } + } + http_authorize(conn, "Proxy-Authorization", + &proxy_challenges, &aparams, url); + clean_http_auth_params(&aparams); } - /* server authorization */ - if (need_auth || *url->user || *url->pwd) { - if (*url->user || *url->pwd) - http_basic_auth(conn, "Authorization", url->user, url->pwd); - else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0') - http_authorize(conn, "Authorization", p); - else if (fetchAuthMethod && fetchAuthMethod(url) == 0) { - http_basic_auth(conn, "Authorization", url->user, url->pwd); + /* + * Server authorization: we never send "a priori" + * Basic auth, which used to be done if user/pass were + * set in the url. This would be weird because we'd send the + * password in the clear even if Digest is finally to be + * used (it would have made more sense for the + * pre-digest version to do this when Basic was specified + * in the environment) + */ + if (server_challenges.valid) { + http_auth_params_t aparams; + init_http_auth_params(&aparams); + if (*url->user || *url->pwd) { + aparams.user = url->user ? + strdup(url->user) : strdup(""); + aparams.password = url->pwd ? + strdup(url->pwd) : strdup(""); + } else if ((p = getenv("HTTP_AUTH")) != NULL && + *p != '\0') { + if (http_authfromenv(p, &aparams) < 0) { + http_seterr(HTTP_NEED_AUTH); + goto ouch; + } + } else if (fetchAuthMethod && + fetchAuthMethod(url) == 0) { + aparams.user = url->user ? + strdup(url->user) : strdup(""); + aparams.password = url->pwd ? + strdup(url->pwd) : strdup(""); } else { http_seterr(HTTP_NEED_AUTH); goto ouch; } + http_authorize(conn, "Authorization", + &server_challenges, &aparams, url); + clean_http_auth_params(&aparams); } /* other headers */ @@ -965,7 +1688,7 @@ http_request(struct url *URL, const char *op, struct url_stat *us, */ break; case HTTP_NEED_AUTH: - if (need_auth) { + if (server_challenges.valid) { /* * We already sent out authorization code, * so there's nothing more we can do. @@ -978,13 +1701,18 @@ http_request(struct url *URL, const char *op, struct url_stat *us, fetch_info("server requires authorization"); break; case HTTP_NEED_PROXY_AUTH: - /* - * If we're talking to a proxy, we already sent - * our proxy authorization code, so there's - * nothing more we can do. - */ - http_seterr(conn->err); - goto ouch; + if (proxy_challenges.valid) { + /* + * We already sent our proxy + * authorization code, so there's + * nothing more we can do. */ + http_seterr(conn->err); + goto ouch; + } + /* try again, but send the password this time */ + if (verbose) + fetch_info("proxy requires authorization"); + break; case HTTP_BAD_RANGE: /* * This can happen if we ask for 0 bytes because @@ -1004,9 +1732,13 @@ http_request(struct url *URL, const char *op, struct url_stat *us, /* fall through so we can get the full error message */ } - /* get headers */ + /* get headers. http_next_header expects one line readahead */ + if (fetch_getln(conn) == -1) { + fetch_syserr(); + goto ouch; + } do { - switch ((h = http_next_header(conn, &p))) { + switch ((h = http_next_header(conn, &headerbuf, &p))) { case hdr_syserror: fetch_syserr(); goto ouch; @@ -1054,7 +1786,12 @@ http_request(struct url *URL, const char *op, struct url_stat *us, case hdr_www_authenticate: if (conn->err != HTTP_NEED_AUTH) break; - /* if we were smarter, we'd check the method and realm */ + http_parse_authenticate(p, &server_challenges); + break; + case hdr_proxy_authenticate: + if (conn->err != HTTP_NEED_PROXY_AUTH) + break; + http_parse_authenticate(p, &proxy_challenges); break; case hdr_end: /* fall through */ @@ -1065,9 +1802,17 @@ http_request(struct url *URL, const char *op, struct url_stat *us, } while (h > hdr_end); /* we need to provide authentication */ - if (conn->err == HTTP_NEED_AUTH) { + if (conn->err == HTTP_NEED_AUTH || + conn->err == HTTP_NEED_PROXY_AUTH) { e = conn->err; - need_auth = 1; + if ((conn->err == HTTP_NEED_AUTH && + !server_challenges.valid) || + (conn->err == HTTP_NEED_PROXY_AUTH && + !proxy_challenges.valid)) { + /* 401/7 but no www/proxy-authenticate ?? */ + DEBUG(fprintf(stderr, "401/7 and no auth header\n")); + goto ouch; + } fetch_close(conn); conn = NULL; continue; @@ -1096,7 +1841,7 @@ http_request(struct url *URL, const char *op, struct url_stat *us, /* all other cases: we got a redirect */ e = conn->err; - need_auth = 0; + clean_http_auth_challenges(&server_challenges); fetch_close(conn); conn = NULL; if (!new) { @@ -1172,7 +1917,9 @@ http_request(struct url *URL, const char *op, struct url_stat *us, fclose(f); f = NULL; } - + clean_http_headerbuf(&headerbuf); + clean_http_auth_challenges(&server_challenges); + clean_http_auth_challenges(&proxy_challenges); return (f); ouch: @@ -1182,6 +1929,9 @@ ouch: fetchFreeURL(purl); if (conn != NULL) fetch_close(conn); + clean_http_headerbuf(&headerbuf); + clean_http_auth_challenges(&server_challenges); + clean_http_auth_challenges(&proxy_challenges); return (NULL); } -- cgit v1.1 From a1b044b56513afcbb4f8cad16d16aa448177373c Mon Sep 17 00:00:00 2001 From: des Date: Tue, 26 Jan 2010 15:07:47 +0000 Subject: insta-mfh r203028 (doc update) --- lib/libfetch/fetch.3 | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/libfetch/fetch.3 b/lib/libfetch/fetch.3 index 9b312b0..aaf1a8d 100644 --- a/lib/libfetch/fetch.3 +++ b/lib/libfetch/fetch.3 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 14, 2008 +.Dd January 26, 2010 .Dt FETCH 3 .Os .Sh NAME @@ -509,9 +509,13 @@ Specifies HTTP authorization parameters as a colon-separated list of items. The first and second item are the authorization scheme and realm respectively; further items are scheme-dependent. -Currently, only basic authorization is supported. +Currently, the +.Dq basic +and +.Dq digest +authorization methods are supported. .Pp -Basic authorization requires two parameters: the user name and +Both methods require two parameters: the user name and password, in that order. .Pp This variable is only used if the server requires authorization and @@ -656,12 +660,14 @@ The .Nm fetch library was mostly written by .An Dag-Erling Sm\(/orgrav Aq des@FreeBSD.org -with numerous suggestions from +with numerous suggestions and contributions from .An Jordan K. Hubbard Aq jkh@FreeBSD.org , -.An Eugene Skepner Aq eu@qub.com -and other -.Fx -developers. +.An Eugene Skepner Aq eu@qub.com , +.An Hajimu Umemoto Aq ume@FreeBSD.org , +.An Henry Whincup Aq henry@techiebod.com , +.An Jukka A. Ukkonen Aq jau@iki.fi , +.An Jean-Fran\(,cois Dockes Aq jf@dockes.org +and others. It replaces the older .Nm ftpio library written by -- cgit v1.1 From ade3e629421f12a6799785d918a77632dfae157a Mon Sep 17 00:00:00 2001 From: antoine Date: Sat, 30 Jan 2010 12:11:21 +0000 Subject: MFC r201145 to stable/8: (S)LIST_HEAD_INITIALIZER takes a (S)LIST_HEAD as an argument. Fix some wrong usages. Note: this does not affect generated binaries as this argument is not used. PR: 137213 Submitted by: Eygene Ryabinkin (initial version) --- lib/libc/gen/sem.c | 2 +- lib/libgssapi/gss_mech_switch.c | 2 +- lib/librpcsec_gss/rpcsec_gss_conf.c | 4 ++-- lib/librpcsec_gss/svc_rpcsec_gss.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/sem.c b/lib/libc/gen/sem.c index 439f0fe..cd31d31 100644 --- a/lib/libc/gen/sem.c +++ b/lib/libc/gen/sem.c @@ -73,7 +73,7 @@ static sem_t sem_alloc(unsigned int value, semid_t semid, int system_sem); static void sem_free(sem_t sem); -static LIST_HEAD(, sem) named_sems = LIST_HEAD_INITIALIZER(&named_sems); +static LIST_HEAD(, sem) named_sems = LIST_HEAD_INITIALIZER(named_sems); static pthread_mutex_t named_sems_mtx = PTHREAD_MUTEX_INITIALIZER; __weak_reference(__sem_init, sem_init); diff --git a/lib/libgssapi/gss_mech_switch.c b/lib/libgssapi/gss_mech_switch.c index feb88f1..d07db88 100644 --- a/lib/libgssapi/gss_mech_switch.c +++ b/lib/libgssapi/gss_mech_switch.c @@ -42,7 +42,7 @@ #endif struct _gss_mech_switch_list _gss_mechs = - SLIST_HEAD_INITIALIZER(&_gss_mechs); + SLIST_HEAD_INITIALIZER(_gss_mechs); gss_OID_set _gss_mech_oids; /* diff --git a/lib/librpcsec_gss/rpcsec_gss_conf.c b/lib/librpcsec_gss/rpcsec_gss_conf.c index 4a0349b..14e063c 100644 --- a/lib/librpcsec_gss/rpcsec_gss_conf.c +++ b/lib/librpcsec_gss/rpcsec_gss_conf.c @@ -55,7 +55,7 @@ struct mech_info { }; SLIST_HEAD(mech_info_list, mech_info); -static struct mech_info_list mechs = SLIST_HEAD_INITIALIZER(&mechs); +static struct mech_info_list mechs = SLIST_HEAD_INITIALIZER(mechs); static const char **mech_names; struct qop_info { @@ -66,7 +66,7 @@ struct qop_info { }; SLIST_HEAD(qop_info_list, qop_info); -static struct qop_info_list qops = SLIST_HEAD_INITIALIZER(&qops); +static struct qop_info_list qops = SLIST_HEAD_INITIALIZER(qops); static int _rpc_gss_string_to_oid(const char* s, gss_OID oid) diff --git a/lib/librpcsec_gss/svc_rpcsec_gss.c b/lib/librpcsec_gss/svc_rpcsec_gss.c index 276126b..d0599be 100644 --- a/lib/librpcsec_gss/svc_rpcsec_gss.c +++ b/lib/librpcsec_gss/svc_rpcsec_gss.c @@ -90,7 +90,7 @@ struct svc_rpc_gss_callback { rpc_gss_callback_t cb_callback; }; static SLIST_HEAD(svc_rpc_gss_callback_list, svc_rpc_gss_callback) - svc_rpc_gss_callbacks = SLIST_HEAD_INITIALIZER(&svc_rpc_gss_callbacks); + svc_rpc_gss_callbacks = SLIST_HEAD_INITIALIZER(svc_rpc_gss_callbacks); struct svc_rpc_gss_svc_name { SLIST_ENTRY(svc_rpc_gss_svc_name) sn_link; @@ -102,7 +102,7 @@ struct svc_rpc_gss_svc_name { u_int sn_version; }; static SLIST_HEAD(svc_rpc_gss_svc_name_list, svc_rpc_gss_svc_name) - svc_rpc_gss_svc_names = SLIST_HEAD_INITIALIZER(&svc_rpc_gss_svc_names); + svc_rpc_gss_svc_names = SLIST_HEAD_INITIALIZER(svc_rpc_gss_svc_names); enum svc_rpc_gss_client_state { CLIENT_NEW, /* still authenticating */ -- cgit v1.1 From 5860340b9e706a6aa77a83f830e2e748f9d518c4 Mon Sep 17 00:00:00 2001 From: kib Date: Sun, 31 Jan 2010 18:38:03 +0000 Subject: MFC r199827: Implement sighold, sigignore, sigpause, sigrelse, sigset functions. MFC r200881 (by cognet): Don't name parameters. --- lib/libc/compat-43/Makefile.inc | 5 ++ lib/libc/compat-43/Symbol.map | 8 ++ lib/libc/compat-43/sigcompat.c | 85 ++++++++++++++++++++- lib/libc/compat-43/sigpause.2 | 160 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 250 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/libc/compat-43/Makefile.inc b/lib/libc/compat-43/Makefile.inc index 836f0a6..8505ff2 100644 --- a/lib/libc/compat-43/Makefile.inc +++ b/lib/libc/compat-43/Makefile.inc @@ -13,6 +13,11 @@ MAN+= creat.2 killpg.2 sigpause.2 sigsetmask.2 sigvec.2 MAN+= gethostid.3 setruid.3 MLINKS+=gethostid.3 sethostid.3 +MLINKS+=sigpause.2 sighold.2 +MLINKS+=sigpause.2 sigignore.2 +MLINKS+=sigpause.2 sigrelse.2 +MLINKS+=sigpause.2 sigset.2 +MLINKS+=sigpause.2 xsi_sigpause.2 MLINKS+=setruid.3 setrgid.3 MLINKS+=sigsetmask.2 sigblock.2 diff --git a/lib/libc/compat-43/Symbol.map b/lib/libc/compat-43/Symbol.map index e859a31..bd49f99 100644 --- a/lib/libc/compat-43/Symbol.map +++ b/lib/libc/compat-43/Symbol.map @@ -17,6 +17,14 @@ FBSD_1.0 { sigvec; }; +FBSD_1.2 { + sighold; + sigignore; + sigrelse; + sigset; + xsi_sigpause; +}; + FBSDprivate_1.0 { __creat; _creat; diff --git a/lib/libc/compat-43/sigcompat.c b/lib/libc/compat-43/sigcompat.c index 6280183..c3ba30a 100644 --- a/lib/libc/compat-43/sigcompat.c +++ b/lib/libc/compat-43/sigcompat.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include +#include #include "un-namespace.h" #include "libc_private.h" @@ -97,8 +98,7 @@ sigblock(mask) } int -sigpause(mask) - int mask; +sigpause(int mask) { sigset_t set; @@ -106,3 +106,84 @@ sigpause(mask) set.__bits[0] = mask; return (_sigsuspend(&set)); } + +int +xsi_sigpause(int sig) +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, sig); + return (_sigsuspend(&set)); +} + +int +sighold(int sig) +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, sig); + return (_sigprocmask(SIG_BLOCK, &set, NULL)); +} + +int +sigignore(int sig) +{ + struct sigaction sa; + + bzero(&sa, sizeof(sa)); + sa.sa_handler = SIG_IGN; + return (_sigaction(sig, &sa, NULL)); +} + +int +sigrelse(int sig) +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, sig); + return (_sigprocmask(SIG_UNBLOCK, &set, NULL)); +} + +void +(*sigset(int sig, void (*disp)(int)))(int) +{ + sigset_t set, pset; + struct sigaction sa, psa; + int error; + + sigemptyset(&set); + sigaddset(&set, sig); + error = _sigprocmask(SIG_BLOCK, NULL, &pset); + if (error == -1) + return (SIG_ERR); + if ((__sighandler_t *)disp == SIG_HOLD) { + error = _sigprocmask(SIG_BLOCK, &set, &pset); + if (error == -1) + return (SIG_ERR); + if (sigismember(&pset, sig)) + return (SIG_HOLD); + else { + error = _sigaction(sig, NULL, &psa); + if (error == -1) + return (SIG_ERR); + return (psa.sa_handler); + } + } else { + error = _sigprocmask(SIG_UNBLOCK, &set, &pset); + if (error == -1) + return (SIG_ERR); + } + + bzero(&sa, sizeof(sa)); + sa.sa_handler = disp; + error = _sigaction(sig, &sa, &psa); + if (error == -1) + return (SIG_ERR); + if (sigismember(&pset, sig)) + return (SIG_HOLD); + else + return (psa.sa_handler); +} diff --git a/lib/libc/compat-43/sigpause.2 b/lib/libc/compat-43/sigpause.2 index 39edb0b..bf3cf0b 100644 --- a/lib/libc/compat-43/sigpause.2 +++ b/lib/libc/compat-43/sigpause.2 @@ -28,21 +28,118 @@ .\" @(#)sigpause.2 8.1 (Berkeley) 6/2/93 .\" $FreeBSD$ .\" +.\" Part of the content of the man page was derived from +.\" The Open Group Base Specifications Issue 7 +.\" IEEE Std 1003.1-2008 +.\" .Dd June 2, 1993 .Dt SIGPAUSE 2 .Os .Sh NAME -.Nm sigpause -.Nd atomically release blocked signals and wait for interrupt +.Nm sighold , +.Nm sigignore , +.Nm sigpause , +.Nm sigrelse , +.Nm sigset +.Nd legacy interface for signal management .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In signal.h .Ft int +.Fn sighold "int sig" +.Ft int +.Fn sigignore "int sig" +.Ft int +.Fn xsi_sigpause "int sigmask" +.Ft int +.Fn sigrelse "int sig" +.Ft void (*)(int) +.Fn sigset "int" "void (*disp)(int)" +.Ft int .Fn sigpause "int sigmask" .Sh DESCRIPTION .Sy This interface is made obsolete by -.Xr sigsuspend 2 . +.Xr sigsuspend 2 +.Sy and +.Xr sigaction 2 +.Pp +The +.Fn sigset +function modifies signal dispositions. +The +.Fa sig +argument specifies the signal, which may be any signal except +.Dv SIGKILL +and +.Dv SIGSTOP . +The +.Fa disp +argument specifies the signal's disposition, +which may be +.Dv SIG_DFL , +.Dv SIG_IGN , +or the address of a signal handler. +If +.Fn sigset +is used, and +.Fa disp +is the address of a signal handler, the +system adds +.Fa sig +to the signal mask of the calling process before executing the signal +handler; when the signal handler returns, the system restores the +signal mask of the calling process to its state prior to the delivery +of the signal. +In addition, if +.Fn sigset +is used, and +.Fa disp +is equal to +.Dv SIG_HOLD , +.Fa sig +is added to the signal +mask of the calling process and +.Fa sig 's +disposition remains unchanged. +If +.Fn sigset +is used, and +.Fa disp +is not equal to +.Dv SIG_HOLD , +.Fa sig +is removed from the signal mask of the calling process. +.Pp +The +.Fn sighold +function adds +.Fa sig +to the signal mask of the calling process. +.Pp +The +.Fn sigrelse +function removes +.Fa sig +from the signal mask of the calling process. +.Pp +The +.Fn sigignore +function sets the disposition of +.Fa sig +to +.Dv SIG_IGN . +.Pp +The +.Fn xsi_sigpause +function removes +.Fa sig +from the signal mask of the calling process and suspend the calling process +until a signal is received. +The +.Fn xsi_sigpause +function restores the signal mask of the process to its original state before +returning. .Pp The .Fn sigpause @@ -57,13 +154,47 @@ The argument is usually 0 to indicate that no signals are to be blocked. +.Sh RETURN VALUES The .Fn sigpause -function -always terminates by being interrupted, returning -1 with +and +.Fn xsi_sigpause +functions +always terminate by being interrupted, returning -1 with .Va errno set to -.Er EINTR +.Er EINTR . +.Pp +Upon successful completion, +.Fn sigset +returns +.Dv SIG_HOLD +if the signal had been blocked and the signal's previous disposition if +it had not been blocked. +Otherwise, +.Dv SIG_ERR is returned and +.Va errno +set to indicate the error. +.Pp +For all other functions, upon successful completion, 0 is returned. +Otherwise, -1 is returned and +.Va errno +is set to indicate the error: +.Bl -tag -width Er +.It Bq Er EINVAL +The +.Fa sig +argument +is not a valid signal number. +.It Bq Er EINVAL +For +.Fn sigset +and +.Fn sigignore +functions, an attempt was made to catch or ignore +.Dv SIGKILL +or +.Dv SIGSTOP . .Sh SEE ALSO .Xr kill 2 , .Xr sigaction 2 , @@ -85,9 +216,26 @@ and was copied from there into the .Pq Tn XSI option of .St -p1003.1-2001 . +.Fx +implements it under the name +.Fn xsi_sigpause . +The +.Fn sighold , +.Fn sigignore , +.Fn sigrelse +and +.Fn sigset +functions are implemented for compatibility with +.Sy System V +and +.Sy XSI +interfaces. .Sh HISTORY The .Fn sigpause function appeared in .Bx 4.2 and has been deprecated. +All other functions appeared in +.Fx 9.0 +and were deprecated before being implemented. -- cgit v1.1 From 9a944ce09eb54108162372ef2d8e6bf920413311 Mon Sep 17 00:00:00 2001 From: gavin Date: Tue, 2 Feb 2010 19:37:26 +0000 Subject: Merge r203025,r203026 from head: Correct the HISTORY section of these man pages to show when the function, not the "manual page example" was introduced. Approved by: ed (mentor, implicit) --- lib/librpcsec_gss/rpc_gss_get_error.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_get_mech_info.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_get_mechanisms.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_get_principal_name.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_get_versions.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_getcred.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_is_installed.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_max_data_length.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_mech_to_oid.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_oid_to_mech.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_qop_to_num.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_seccreate.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_set_callback.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_set_defaults.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_set_svc_name.3 | 4 ++-- lib/librpcsec_gss/rpc_gss_svc_max_data_length.3 | 4 ++-- lib/librpcsec_gss/rpcsec_gss.3 | 4 ++-- 17 files changed, 34 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/librpcsec_gss/rpc_gss_get_error.3 b/lib/librpcsec_gss/rpc_gss_get_error.3 index e108766..74d2fa0 100644 --- a/lib/librpcsec_gss/rpc_gss_get_error.3 +++ b/lib/librpcsec_gss/rpc_gss_get_error.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_GET_ERROR 3 .Os .Sh NAME @@ -50,7 +50,7 @@ A pointer to a structure where the error details will be returned .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_get_mech_info.3 b/lib/librpcsec_gss/rpc_gss_get_mech_info.3 index a7f7d6b..945b00b 100644 --- a/lib/librpcsec_gss/rpc_gss_get_mech_info.3 +++ b/lib/librpcsec_gss/rpc_gss_get_mech_info.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_GET_MECH_INFO 3 .Os .Sh NAME @@ -60,7 +60,7 @@ otherwise .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_get_mechanisms.3 b/lib/librpcsec_gss/rpc_gss_get_mechanisms.3 index 4b57ac6..e18a648 100644 --- a/lib/librpcsec_gss/rpc_gss_get_mechanisms.3 +++ b/lib/librpcsec_gss/rpc_gss_get_mechanisms.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_GET_MECHANISMS 3 .Os .Sh NAME @@ -47,7 +47,7 @@ terminated list of installed security mechanisms. .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_get_principal_name.3 b/lib/librpcsec_gss/rpc_gss_get_principal_name.3 index 6f57212..19297f6 100644 --- a/lib/librpcsec_gss/rpc_gss_get_principal_name.3 +++ b/lib/librpcsec_gss/rpc_gss_get_principal_name.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_GET_PRINCIPAL_NAME 3 .Os .Sh NAME @@ -74,7 +74,7 @@ otherwise .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_get_versions.3 b/lib/librpcsec_gss/rpc_gss_get_versions.3 index f824066..78ebe73 100644 --- a/lib/librpcsec_gss/rpc_gss_get_versions.3 +++ b/lib/librpcsec_gss/rpc_gss_get_versions.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_GET_VERSIONS 3 .Os .Sh NAME @@ -56,7 +56,7 @@ is set to the lowest suppored protocol version .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_getcred.3 b/lib/librpcsec_gss/rpc_gss_getcred.3 index 2ede33e..844e61e 100644 --- a/lib/librpcsec_gss/rpc_gss_getcred.3 +++ b/lib/librpcsec_gss/rpc_gss_getcred.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_GETCRED 3 .Os .Sh NAME @@ -77,7 +77,7 @@ otherwise. .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_is_installed.3 b/lib/librpcsec_gss/rpc_gss_is_installed.3 index 2859ed2..a043c45 100644 --- a/lib/librpcsec_gss/rpc_gss_is_installed.3 +++ b/lib/librpcsec_gss/rpc_gss_is_installed.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_IS_INSTALLED 3 .Os .Sh NAME @@ -57,7 +57,7 @@ otherwise. .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_max_data_length.3 b/lib/librpcsec_gss/rpc_gss_max_data_length.3 index 8957119..a600192 100644 --- a/lib/librpcsec_gss/rpc_gss_max_data_length.3 +++ b/lib/librpcsec_gss/rpc_gss_max_data_length.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_MAX_DATA_LENGTH 3 .Os .Sh NAME @@ -56,7 +56,7 @@ The maximum message size that can be encoded .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_mech_to_oid.3 b/lib/librpcsec_gss/rpc_gss_mech_to_oid.3 index c1f9f2a..95d0cd7 100644 --- a/lib/librpcsec_gss/rpc_gss_mech_to_oid.3 +++ b/lib/librpcsec_gss/rpc_gss_mech_to_oid.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_MECH_TO_OID 3 .Os .Sh NAME @@ -60,7 +60,7 @@ is returned, otherwise .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_oid_to_mech.3 b/lib/librpcsec_gss/rpc_gss_oid_to_mech.3 index 6183f26..c91a2f5 100644 --- a/lib/librpcsec_gss/rpc_gss_oid_to_mech.3 +++ b/lib/librpcsec_gss/rpc_gss_oid_to_mech.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_OID_TO_MECH 3 .Os .Sh NAME @@ -60,7 +60,7 @@ is returned, otherwise .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_qop_to_num.3 b/lib/librpcsec_gss/rpc_gss_qop_to_num.3 index e58d553..38d2725 100644 --- a/lib/librpcsec_gss/rpc_gss_qop_to_num.3 +++ b/lib/librpcsec_gss/rpc_gss_qop_to_num.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_QOP_TO_NUM 3 .Os .Sh NAME @@ -62,7 +62,7 @@ is returned, otherwise .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_seccreate.3 b/lib/librpcsec_gss/rpc_gss_seccreate.3 index 0f7dfab..07204cb 100644 --- a/lib/librpcsec_gss/rpc_gss_seccreate.3 +++ b/lib/librpcsec_gss/rpc_gss_seccreate.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_SECCREATE 3 .Os .Sh NAME @@ -104,7 +104,7 @@ to this value. .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_set_callback.3 b/lib/librpcsec_gss/rpc_gss_set_callback.3 index 0ad861f..f701cf1 100644 --- a/lib/librpcsec_gss/rpc_gss_set_callback.3 +++ b/lib/librpcsec_gss/rpc_gss_set_callback.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_SET_CALLBACK 3 .Os .Sh NAME @@ -100,7 +100,7 @@ otherwise .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_set_defaults.3 b/lib/librpcsec_gss/rpc_gss_set_defaults.3 index 241cf9e..c4a24cf 100644 --- a/lib/librpcsec_gss/rpc_gss_set_defaults.3 +++ b/lib/librpcsec_gss/rpc_gss_set_defaults.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_SET_DEFAULTS 3 .Os .Sh NAME @@ -62,7 +62,7 @@ if the values were set .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_set_svc_name.3 b/lib/librpcsec_gss/rpc_gss_set_svc_name.3 index 9be8fdf..79a9c68 100644 --- a/lib/librpcsec_gss/rpc_gss_set_svc_name.3 +++ b/lib/librpcsec_gss/rpc_gss_set_svc_name.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_SET_SVC_NAME 3 .Os .Sh NAME @@ -79,7 +79,7 @@ otherwise. .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpc_gss_svc_max_data_length.3 b/lib/librpcsec_gss/rpc_gss_svc_max_data_length.3 index 2445f75..ef4871f 100644 --- a/lib/librpcsec_gss/rpc_gss_svc_max_data_length.3 +++ b/lib/librpcsec_gss/rpc_gss_svc_max_data_length.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_SVC_MAX_DATA_LENGTH 3 .Os .Sh NAME @@ -56,7 +56,7 @@ The maximum message size that can be encoded .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 8.0 . .Sh AUTHORS This diff --git a/lib/librpcsec_gss/rpcsec_gss.3 b/lib/librpcsec_gss/rpcsec_gss.3 index 793f012..45bd82f 100644 --- a/lib/librpcsec_gss/rpcsec_gss.3 +++ b/lib/librpcsec_gss/rpcsec_gss.3 @@ -24,7 +24,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd July 4, 2008 +.Dd January 26, 2010 .Dt RPC_GSS_SECCREATE 3 .Os .Sh NAME @@ -222,7 +222,7 @@ Calculate maximum server message sizes. .Sh HISTORY The .Nm -manual page example first appeared in +library first appeared in .Fx 8.0 . .Sh AUTHORS This -- cgit v1.1 From 9ce8997aab4144c0a5f5fb94bd7c3a1cb77bcbba Mon Sep 17 00:00:00 2001 From: gavin Date: Tue, 2 Feb 2010 19:44:52 +0000 Subject: Merge r203027 from head: Correct the HISTORY section of these man pages to show when the function, not the "manual page example" was introduced. Approved by: ed (mentor, implicit) --- lib/libgssapi/gss_accept_sec_context.3 | 4 ++-- lib/libgssapi/gss_acquire_cred.3 | 4 ++-- lib/libgssapi/gss_add_cred.3 | 4 ++-- lib/libgssapi/gss_add_oid_set_member.3 | 4 ++-- lib/libgssapi/gss_canonicalize_name.3 | 4 ++-- lib/libgssapi/gss_compare_name.3 | 4 ++-- lib/libgssapi/gss_context_time.3 | 4 ++-- lib/libgssapi/gss_create_empty_oid_set.3 | 4 ++-- lib/libgssapi/gss_delete_sec_context.3 | 4 ++-- lib/libgssapi/gss_display_name.3 | 4 ++-- lib/libgssapi/gss_display_status.3 | 4 ++-- lib/libgssapi/gss_duplicate_name.3 | 4 ++-- lib/libgssapi/gss_export_name.3 | 4 ++-- lib/libgssapi/gss_export_sec_context.3 | 4 ++-- lib/libgssapi/gss_get_mic.3 | 4 ++-- lib/libgssapi/gss_import_name.3 | 4 ++-- lib/libgssapi/gss_import_sec_context.3 | 4 ++-- lib/libgssapi/gss_indicate_mechs.3 | 4 ++-- lib/libgssapi/gss_init_sec_context.3 | 4 ++-- lib/libgssapi/gss_inquire_context.3 | 4 ++-- lib/libgssapi/gss_inquire_cred.3 | 4 ++-- lib/libgssapi/gss_inquire_cred_by_mech.3 | 4 ++-- lib/libgssapi/gss_inquire_mechs_for_name.3 | 4 ++-- lib/libgssapi/gss_inquire_names_for_mech.3 | 4 ++-- lib/libgssapi/gss_process_context_token.3 | 4 ++-- lib/libgssapi/gss_release_buffer.3 | 4 ++-- lib/libgssapi/gss_release_cred.3 | 4 ++-- lib/libgssapi/gss_release_name.3 | 4 ++-- lib/libgssapi/gss_release_oid_set.3 | 4 ++-- lib/libgssapi/gss_test_oid_set_member.3 | 4 ++-- lib/libgssapi/gss_unwrap.3 | 4 ++-- lib/libgssapi/gss_verify_mic.3 | 4 ++-- lib/libgssapi/gss_wrap.3 | 4 ++-- lib/libgssapi/gss_wrap_size_limit.3 | 4 ++-- lib/libgssapi/gssapi.3 | 4 ++-- lib/libgssapi/mech.5 | 4 ++-- 36 files changed, 72 insertions(+), 72 deletions(-) (limited to 'lib') diff --git a/lib/libgssapi/gss_accept_sec_context.3 b/lib/libgssapi/gss_accept_sec_context.3 index 6b57518..8957831 100644 --- a/lib/libgssapi/gss_accept_sec_context.3 +++ b/lib/libgssapi/gss_accept_sec_context.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_ACCEPT_SEC_CONTEXT 3 PRM .Sh NAME @@ -451,7 +451,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_acquire_cred.3 b/lib/libgssapi/gss_acquire_cred.3 index b43185c..c48a468 100644 --- a/lib/libgssapi/gss_acquire_cred.3 +++ b/lib/libgssapi/gss_acquire_cred.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_ACQUIRE_CRED 3 PRM .Sh NAME @@ -205,7 +205,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_add_cred.3 b/lib/libgssapi/gss_add_cred.3 index 72f2ab0..53df140 100644 --- a/lib/libgssapi/gss_add_cred.3 +++ b/lib/libgssapi/gss_add_cred.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_ADD_CRED 3 PRM .Sh NAME @@ -305,7 +305,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_add_oid_set_member.3 b/lib/libgssapi/gss_add_oid_set_member.3 index 255050b..fb2119c 100644 --- a/lib/libgssapi/gss_add_oid_set_member.3 +++ b/lib/libgssapi/gss_add_oid_set_member.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_ADD_OID_SET_MEMBER 3 PRM .Sh NAME @@ -97,7 +97,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_canonicalize_name.3 b/lib/libgssapi/gss_canonicalize_name.3 index b38ca93..86957dc 100644 --- a/lib/libgssapi/gss_canonicalize_name.3 +++ b/lib/libgssapi/gss_canonicalize_name.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_CANONICALIZE_NAME 3 PRM .Sh NAME @@ -104,7 +104,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_compare_name.3 b/lib/libgssapi/gss_compare_name.3 index 561c994..7f69d43 100644 --- a/lib/libgssapi/gss_compare_name.3 +++ b/lib/libgssapi/gss_compare_name.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_COMPARE_NAME PRM .Sh NAME @@ -89,7 +89,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_context_time.3 b/lib/libgssapi/gss_context_time.3 index 96f61a9..9fb43ea 100644 --- a/lib/libgssapi/gss_context_time.3 +++ b/lib/libgssapi/gss_context_time.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_CONTEXT_TIME 3 PRM .Sh NAME @@ -75,7 +75,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_create_empty_oid_set.3 b/lib/libgssapi/gss_create_empty_oid_set.3 index fd6cca6..08d82f6 100644 --- a/lib/libgssapi/gss_create_empty_oid_set.3 +++ b/lib/libgssapi/gss_create_empty_oid_set.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_CREATE_EMPTY_OID_SET 3 PRM .Sh NAME @@ -78,7 +78,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_delete_sec_context.3 b/lib/libgssapi/gss_delete_sec_context.3 index d24644a..a0c26df 100644 --- a/lib/libgssapi/gss_delete_sec_context.3 +++ b/lib/libgssapi/gss_delete_sec_context.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_DELETE_SEC_CONTEXT 3 PRM .Sh NAME @@ -130,7 +130,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_display_name.3 b/lib/libgssapi/gss_display_name.3 index 4f50870..2441eef 100644 --- a/lib/libgssapi/gss_display_name.3 +++ b/lib/libgssapi/gss_display_name.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_DISPLAY_NAME 3 PRM .Sh NAME @@ -118,7 +118,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_display_status.3 b/lib/libgssapi/gss_display_status.3 index 3b3c323..2b1affd 100644 --- a/lib/libgssapi/gss_display_status.3 +++ b/lib/libgssapi/gss_display_status.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_DISPLAY_STATUS 3 PRM .Sh NAME @@ -177,7 +177,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_duplicate_name.3 b/lib/libgssapi/gss_duplicate_name.3 index af885bf..334a126 100644 --- a/lib/libgssapi/gss_duplicate_name.3 +++ b/lib/libgssapi/gss_duplicate_name.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_DUPLICATE_NAME 3 PRM .Sh NAME @@ -90,7 +90,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_export_name.3 b/lib/libgssapi/gss_export_name.3 index c10f472..e1feee2 100644 --- a/lib/libgssapi/gss_export_name.3 +++ b/lib/libgssapi/gss_export_name.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_EXPORT_NAME 3 PRM .Sh NAME @@ -95,7 +95,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_export_sec_context.3 b/lib/libgssapi/gss_export_sec_context.3 index d8359c8..1f7f01d 100644 --- a/lib/libgssapi/gss_export_sec_context.3 +++ b/lib/libgssapi/gss_export_sec_context.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_EXPORT_SEC_CONTEXT 3 PRM .Sh NAME @@ -135,7 +135,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_get_mic.3 b/lib/libgssapi/gss_get_mic.3 index 8f435d6..8892808 100644 --- a/lib/libgssapi/gss_get_mic.3 +++ b/lib/libgssapi/gss_get_mic.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_GET_MIC 3 PRM .Sh NAME @@ -132,7 +132,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_import_name.3 b/lib/libgssapi/gss_import_name.3 index ed16278..3119f04 100644 --- a/lib/libgssapi/gss_import_name.3 +++ b/lib/libgssapi/gss_import_name.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_IMPORT_NAME 3 PRM .Sh NAME @@ -106,7 +106,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_import_sec_context.3 b/lib/libgssapi/gss_import_sec_context.3 index f4dda84..a889e2b 100644 --- a/lib/libgssapi/gss_import_sec_context.3 +++ b/lib/libgssapi/gss_import_sec_context.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_IMPORT_SEC_CONTEXT 3 PRM .Sh NAME @@ -87,7 +87,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_indicate_mechs.3 b/lib/libgssapi/gss_indicate_mechs.3 index 469ad1d..26ffaa6 100644 --- a/lib/libgssapi/gss_indicate_mechs.3 +++ b/lib/libgssapi/gss_indicate_mechs.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_INDICATE_MECHS 3 PRM .Sh NAME @@ -74,7 +74,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_init_sec_context.3 b/lib/libgssapi/gss_init_sec_context.3 index 5c39de4..787aff3 100644 --- a/lib/libgssapi/gss_init_sec_context.3 +++ b/lib/libgssapi/gss_init_sec_context.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_INIT_SEC_CONTEXT 3 PRM .Sh NAME @@ -538,7 +538,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_inquire_context.3 b/lib/libgssapi/gss_inquire_context.3 index 2ecd4d6..51fa18d 100644 --- a/lib/libgssapi/gss_inquire_context.3 +++ b/lib/libgssapi/gss_inquire_context.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_INQUIRE_CONTEXT 3 PRM .Sh NAME @@ -251,7 +251,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_inquire_cred.3 b/lib/libgssapi/gss_inquire_cred.3 index 979f96e..99c20ce 100644 --- a/lib/libgssapi/gss_inquire_cred.3 +++ b/lib/libgssapi/gss_inquire_cred.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_INQUIRE_CRED 3 PRM .Sh NAME @@ -125,7 +125,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_inquire_cred_by_mech.3 b/lib/libgssapi/gss_inquire_cred_by_mech.3 index 85b5ecc..22319bd 100644 --- a/lib/libgssapi/gss_inquire_cred_by_mech.3 +++ b/lib/libgssapi/gss_inquire_cred_by_mech.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_INQUIRE_CRED_BY_MECH 3 PRM .Sh NAME @@ -139,7 +139,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_inquire_mechs_for_name.3 b/lib/libgssapi/gss_inquire_mechs_for_name.3 index 91d1c81..b56e663 100644 --- a/lib/libgssapi/gss_inquire_mechs_for_name.3 +++ b/lib/libgssapi/gss_inquire_mechs_for_name.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_INQUIRE_MECHS_FOR_NAME 3 PRM .Sh NAME @@ -100,7 +100,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_inquire_names_for_mech.3 b/lib/libgssapi/gss_inquire_names_for_mech.3 index 64d8c9c..8b7f069 100644 --- a/lib/libgssapi/gss_inquire_names_for_mech.3 +++ b/lib/libgssapi/gss_inquire_names_for_mech.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_INQUIRE_NAMES_FOR_MECH 3 PRM .Sh NAME @@ -74,7 +74,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_process_context_token.3 b/lib/libgssapi/gss_process_context_token.3 index 884a556..85e1029 100644 --- a/lib/libgssapi/gss_process_context_token.3 +++ b/lib/libgssapi/gss_process_context_token.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_PROCESS_CONTEXT_TOKEN 3 PRM .Sh NAME @@ -103,7 +103,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_release_buffer.3 b/lib/libgssapi/gss_release_buffer.3 index 4cc449f..432d422 100644 --- a/lib/libgssapi/gss_release_buffer.3 +++ b/lib/libgssapi/gss_release_buffer.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_RELEASE_BUFFER 3 PRM .Sh NAME @@ -78,7 +78,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_release_cred.3 b/lib/libgssapi/gss_release_cred.3 index 2f9fca6..ca99fc4 100644 --- a/lib/libgssapi/gss_release_cred.3 +++ b/lib/libgssapi/gss_release_cred.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_RELEASE_CRED 3 PRM .Sh NAME @@ -75,7 +75,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_release_name.3 b/lib/libgssapi/gss_release_name.3 index a6d858c..7bf1181 100644 --- a/lib/libgssapi/gss_release_name.3 +++ b/lib/libgssapi/gss_release_name.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_RELEASE_NAME 3 PRM .Sh NAME @@ -71,7 +71,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_release_oid_set.3 b/lib/libgssapi/gss_release_oid_set.3 index 1e11f44..0142e2f 100644 --- a/lib/libgssapi/gss_release_oid_set.3 +++ b/lib/libgssapi/gss_release_oid_set.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_RELEASE_OID_SET 3 PRM .Sh NAME @@ -76,7 +76,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_test_oid_set_member.3 b/lib/libgssapi/gss_test_oid_set_member.3 index 38e5f3f..86e06b3 100644 --- a/lib/libgssapi/gss_test_oid_set_member.3 +++ b/lib/libgssapi/gss_test_oid_set_member.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_TEST_OID_SET_MEMBER 3 PRM .Sh NAME @@ -83,7 +83,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_unwrap.3 b/lib/libgssapi/gss_unwrap.3 index 770d9c9..4cf3c6d 100644 --- a/lib/libgssapi/gss_unwrap.3 +++ b/lib/libgssapi/gss_unwrap.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_UNWRAP 3 PRM .Sh NAME @@ -158,7 +158,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_verify_mic.3 b/lib/libgssapi/gss_verify_mic.3 index e482dbf..b72f468 100644 --- a/lib/libgssapi/gss_verify_mic.3 +++ b/lib/libgssapi/gss_verify_mic.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_VERIFY_MIC 3 PRM .Sh NAME @@ -139,7 +139,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_wrap.3 b/lib/libgssapi/gss_wrap.3 index a80cf46..c432b48 100644 --- a/lib/libgssapi/gss_wrap.3 +++ b/lib/libgssapi/gss_wrap.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_WRAP 3 PRM .Sh NAME @@ -146,7 +146,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gss_wrap_size_limit.3 b/lib/libgssapi/gss_wrap_size_limit.3 index a3e5a9d..c1d40da 100644 --- a/lib/libgssapi/gss_wrap_size_limit.3 +++ b/lib/libgssapi/gss_wrap_size_limit.3 @@ -27,7 +27,7 @@ .\" $FreeBSD$ .\" .\" The following commands are required for all man pages. -.Dd October 30, 2007 +.Dd January 26, 2010 .Os .Dt GSS_WRAP_SIZE_LIMIT 3 PRM .Sh NAME @@ -131,7 +131,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page example first appeared in +function first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/gssapi.3 b/lib/libgssapi/gssapi.3 index ce26e7e..2ed54a9 100644 --- a/lib/libgssapi/gssapi.3 +++ b/lib/libgssapi/gssapi.3 @@ -26,7 +26,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 30, 2005 +.Dd January 26, 2010 .Dt GSSAPI 3 .Os .Sh NAME @@ -229,7 +229,7 @@ Generic Security Service API Version 2 : C-bindings .Sh HISTORY The .Nm -manual page first appeared in +library first appeared in .Fx 7.0 . .Sh AUTHORS John Wray, Iris Associates diff --git a/lib/libgssapi/mech.5 b/lib/libgssapi/mech.5 index fd17e3a..6078828 100644 --- a/lib/libgssapi/mech.5 +++ b/lib/libgssapi/mech.5 @@ -23,7 +23,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd November 14, 2005 +.Dd January 26, 2010 .Dt MECH 5 .Os .Sh NAME @@ -93,7 +93,7 @@ GSS_KRB5_CONF_C_QOP_DES 0x0100 kerberosv5 .Sh HISTORY The .Nm -manual page example first appeared in +manual page first appeared in .Fx 7.0 . .Sh AUTHORS This -- cgit v1.1 From 29b12428b8ad7caf2654fa46475ed1587a474fb0 Mon Sep 17 00:00:00 2001 From: joerg Date: Thu, 4 Feb 2010 19:49:07 +0000 Subject: MFC r203356: add a manpage for gpib(3). --- lib/libgpib/Makefile | 19 ++ lib/libgpib/gpib.3 | 738 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 757 insertions(+) create mode 100644 lib/libgpib/gpib.3 (limited to 'lib') diff --git a/lib/libgpib/Makefile b/lib/libgpib/Makefile index 5465d8b..0ead93f 100644 --- a/lib/libgpib/Makefile +++ b/lib/libgpib/Makefile @@ -7,4 +7,23 @@ INCSDIR= ${INCLUDEDIR}/gpib SRCS= ibfoo.c WARNS?= 6 +MAN= gpib.3 + +# MLINKS are only provided for functions that are actually +# implemented; update this if missing pieces have been filled in. +MLINKS+= gpib.3 ibclr.3 +MLINKS+= gpib.3 ibdev.3 +MLINKS+= gpib.3 ibdma.3 +MLINKS+= gpib.3 ibeos.3 +MLINKS+= gpib.3 ibeot.3 +MLINKS+= gpib.3 ibloc.3 +MLINKS+= gpib.3 ibonl.3 +MLINKS+= gpib.3 ibpad.3 +MLINKS+= gpib.3 ibrd.3 +MLINKS+= gpib.3 ibsad.3 +MLINKS+= gpib.3 ibsic.3 +MLINKS+= gpib.3 ibtmo.3 +MLINKS+= gpib.3 ibtrg.3 +MLINKS+= gpib.3 ibwrt.3 + .include diff --git a/lib/libgpib/gpib.3 b/lib/libgpib/gpib.3 new file mode 100644 index 0000000..a57d3a4 --- /dev/null +++ b/lib/libgpib/gpib.3 @@ -0,0 +1,738 @@ +.\" Copyright (c) 2010, Joerg Wunsch +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd February 1, 2010 +.Dt GPIB 3 +.Os +.Sh NAME +.\" .Nm ibask , +.\" .Nm ibbna , +.\" .Nm ibcac , +.Nm ibclr , +.\" .Nm ibcmd , +.\" .Nm ibcmda , +.\" .Nm ibconfig , +.Nm ibdev , +.\" .Nm ibdiag , +.Nm ibdma , +.Nm ibeos , +.Nm ibeot , +.\" .Nm ibevent , +.\" .Nm ibfind , +.\" .Nm ibgts , +.\" .Nm ibist , +.\" .Nm iblines , +.\" .Nm ibllo , +.\" .Nm ibln , +.Nm ibloc , +.Nm ibonl , +.Nm ibpad , +.\" .Nm ibpct , +.\" .Nm ibpoke , +.\" .Nm ibppc , +.Nm ibrd , +.\" .Nm ibrda , +.\" .Nm ibrdf , +.\" .Nm ibrdkey , +.\" .Nm ibrpp , +.\" .Nm ibrsc , +.\" .Nm ibrsp , +.\" .Nm ibrsv , +.Nm ibsad , +.\" .Nm ibsgnl , +.Nm ibsic , +.\" .Nm ibsre , +.\" .Nm ibsrq , +.\" .Nm ibstop , +.Nm ibtmo , +.\" .Nm ibtrap , +.Nm ibtrg , +.\" .Nm ibwait , +.Nm ibwrt +.\" .Nm ibwrta , +.\" .Nm ibwrtf , +.\" .Nm ibwrtkey , +.\" .Nm ibxtrc +.Nd "GPIB library" +.Sh LIBRARY +.Lb libgpib +.Sh SYNOPSIS +.In gpib.h +.Pp +.Dv extern int ibcnt , +.Dv iberr , +.Dv ibsta ; +.Pp +.Ft int +.Fn ibask "int handle" "int option" "int *retval" +.Ft int +.Fn ibbna "int handle" "char *bdname" +.Ft int +.Fn ibcac "int handle" "int v" +.Ft int +.Fn ibclr "int handle" +.Ft int +.Fn ibcmd "int handle" "void *buffer" "long cnt" +.Ft int +.Fn ibcmda "int handle" "void *buffer" "long cnt" +.Ft int +.Fn ibconfig "int handle" "int option" "int value" +.Ft int +.Fn ibdev "int boardID" "int pad" "int sad" "int tmo" "int eot" "int eos" +.Ft int +.Fn ibdiag "int handle" "void *buffer" "long cnt" +.Ft int +.Fn ibdma "int handle" "int v" +.Ft int +.Fn ibeos "int handle" "int eos" +.Ft int +.Fn ibeot "int handle" "int eot" +.Ft int +.Fn ibevent "int handle" "short *event" +.Ft int +.Fn ibfind "char *bdname" +.Ft int +.Fn ibgts "int handle" "int v" +.Ft int +.Fn ibist "int handle" "int v" +.Ft int +.Fn iblines "int handle" "short *lines" +.Ft int +.Fn ibllo "int handle" +.Ft int +.Fn ibln "int handle" "int padval" "int sadval" "short *listenflag" +.Ft int +.Fn ibloc "int handle" +.Ft int +.Fn ibonl "int handle" "int v" +.Ft int +.Fn ibpad "int handle" "int pad" +.Ft int +.Fn ibpct "int handle" +.Ft int +.Fn ibpoke "int handle" "int option" "int value" +.Ft int +.Fn ibppc "int handle" "int v" +.Ft int +.Fn ibrd "int handle" "void *buffer" "long cnt" +.Ft int +.Fn ibrda "int handle" "void *buffer" "long cnt" +.Ft int +.Fn ibrdf "int handle" "char *flname" +.Ft int +.Fn ibrdkey "int handle" "void *buffer" "int cnt" +.Ft int +.Fn ibrpp "int handle" "char *ppr" +.Ft int +.Fn ibrsc "int handle" "int v" +.Ft int +.Fn ibrsp "int handle" "char *spr" +.Ft int +.Fn ibrsv "int handle" "int v" +.Ft int +.Fn ibsad "int handle" "int sad" +.Ft int +.Fn ibsgnl "int handle" "int v" +.Ft int +.Fn ibsic "int handle" +.Ft int +.Fn ibsre "int handle" "int v" +.Ft int +.Fn ibsrq "(*func) void)" +.Ft int +.Fn ibstop "int handle" +.Ft int +.Fn ibtmo "int handle" "int tmo" +.Ft int +.Fn ibtrap "int mask" "int mode" +.Ft int +.Fn ibtrg "int handle" +.Ft int +.Fn ibwait "int handle" "int mask" +.Ft int +.Fn ibwrt "int handle" "const void *buffer" "long cnt" +.Ft int +.Fn ibwrta "int handle" "const void *buffer" "long cnt" +.Ft int +.Fn ibwrtf "int handle" "const char *flname" +.Ft int +.Fn ibwrtkey "int handle" "const void *buffer" "int cnt" +.Ft int +.Fn ibxtrc "int handle" "void *buffer" "long cnt" +.Sh DESCRIPTION +The +.Nm +library provides access to the +.Xr gpib 4 +kernel devices. +.Ss Variable Description +The variable +.Dv ibcnt +contains the number of bytes transferred in the most recent call to +.Fn ibcmd , +.Fn ibrd , +or +.Fn ibwrt . +.Pp +The name +.Dv ibcntl +is an alias for +.Dv ibcnt , +provided for backwards compatibility. +.Pp +The variable +.Dv iberr +provides an error code for the most recent library call. +The possible error codes are: +.Bl -tag -offset indent -compact +.It EDVR +System error +.It ECIC +Not Active Controller +.It ENOL +Nobody listening +.It EADR +Controller not addressed +.It EARG +Invalid argument +.It ESAC +Not System Controller +.It EABO +I/O Aborted/Time out +.It ENEB +No such controller +.It EOIP +Async I/O in progress +.It ECAP +No such capability +.It EFSO +File system error +.It EBUS +Command byte xfer error +.It ESTB +Serial poll status byte lost +.It ESRQ +SRQ line stuck +.It ETAB +Table problem +.El +.Pp +The variable +.Dv ibsta +contains the controller status. +This is an ORed status value, with the following individual bit names: +.Bl -tag -offset indent -compact +.It ERR +Error +.It TIMO +Timeout +.It END +EOI/EOS +.It SRQI +SRQ +.It RQS +Device requests service +.It SPOLL +Serial Poll +.It EVENT +Event occured +.It CMPL +I/O complete +.It LOK +Lockout +.It REM +Remote +.It CIC +CIC +.It ATN +ATN +.It TACS +Talker +.It LACS +Listener +.It DTAS +Device trigger status +.It DCAS +Device clear state +.El +.Ss Function Description +.Pp +The function +.Fn ibdev +is used to open the GPIB device, and establish the parameters to +communicate with a particular bus device. The device is selected +by its primary address +.Fa pad , +a numerical value between 0 and 30, possibly additionally by its +secondary address +.Fa sad , +a numerical value between 96 and 126, or 0 to not use secondary +addressing. +The +.Fa tmo +value specifies the timeout to use when communicating with the device. +This can be any of the constants +.Dv TNONE , +.Dv T10us , +.Dv T30us , +.Dv T100us , +.Dv T300us , +.Dv T1ms , +.Dv T3ms , +.Dv T10ms , +.Dv T30ms , +.Dv T100ms , +.Dv T300ms , +.Dv T1s , +.Dv T3s , +.Dv T10s , +.Dv T30s , +.Dv T100s , +.Dv T300s , +or +.Dv T1000s . +The boolean parameter +.Fa eot +specifies whether the bus signal +.Li EOI +(end-or-identify) should be asserted when sending the last byte of a +message to the device. +Finally, the +.Fa eos +parameter determines whether any special character should be used to +identify the end of a device message when transferring messages on the +bus. +The lower 8 bits of +.Fa eos +are interpreted as an end-of-string character, +.Li EOS . +This character can be ORed with the following values: +.Bl -tag -compact -offset indent +.It Dv REOS +When receiving a message byte on the bus that matches the +.Li EOS +character, treat it as if the +.Li EOI +signal were asserted, and stop receiving. +.It Dv XEOS +When transmitting a message byte on the bus that matches the +.Li EOS +character, assert the +.Li EOI +bus signal by the same time, and stop sending. +.It Dv BIN +If set, include all 8 bits of the +.Li EOS +character in the comparison; if unset, compare only 7 bit ASCII +values. +.El +Passing 0 as +.Fa eos +will turn off any special character treatment, allowing for a fully +8-bit transparent communications channel to the device. +.Pp +The function +.Fn ibfind +is meant to find the +.Em board index +of a board identified by the name +.Fa bdname . +.Em This function is currently not implemented. +.Pp +All remaining functions take the handle returned by calling +.Fn ibdev +as their first argument +.Fa handle . +.Pp +The function +.Fn ibask +is used to query configuration values that have been set with +.Fn ibconfig . +.Em This function is currently not implemented. +.Pp +The function +.Fn ibbna +is meant to change the access board for the given device to +a new one, named +.Fa bdname . +.Em This function is currently not implemented. +.Pp +The function +.Fn ibcac +is used to become the active controller on the bus, by asserting the +.Li ATN +signal line. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibclr +is used to transmit a +.Em Selected Device Clear +command to the device. +.Pp +The function +.Fn ibcmd +is used to directly write +.Fa cnt +GPIB command bytes from a buffer starting at +.Fa buffer +to the device. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibcmda +does the same as +.Fn ibcmd +except it operates asynchronously, so it returns to the caller +immediately. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibconfig +is used to set certain configuration parameters. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibdiag +is obsolete, and not implemented. +.Pp +The function +.Fn ibdma +is used to enable or disable DMA transfers. +Parameter +.Fa v +is a boolean parameter indicating DMA transfers are to be used. +Depending on the hardware and operating system configuration, DMA +transfers might not be available for a particular access board. +.Pp +The function +.Fn ibeos +configures the end-of-string character. +See +.Fn ibdev +for an explanation. +.Pp +The function +.Fn ibeot +configures the assertion of the +.Li EOI +signal line when transmitting the last byte of a message; see +.Fn ibdev +for an explanation. +.Pp +The function +.Fn ibevent +is used to obtain an event from the board's event queue. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibgts +makes the current controller the standby controller, by deasserting +the +.Li ATN +signal line. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibist +sets the individual status bits of the controller to the value +.Fa v . +.Em This function is currently not implemented. +.Pp +The function +.Fn iblines +returns the status of the control and handshake bus lines into the +area pointed to by +.Fa lines . +.Em This function is currently not implemented. +.Pp +The function +.Fn ibllo +is obsolete, and not implemented. +.Pp +The function +.Fn ibln +checks for a listener at the primary address +.Fa padval +and the optional secondary address +.Fa sadval . +If a listener was found, the value pointed to by +.Fa listenflag +will be set to a non-zero value. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibloc +turns the device into local mode. +.Pp +The function +.Fn ibonl +is used to close or reinitialize a device handle. +If parameter +.Fa v +is passed as zero, the handle will be closed, and cannot be used +again. +If it is passed as a non-zero value, all parameters of the handle +will be returned to their defaults; +.Em this functionality is currently unsupported. +.Pp +The function +.Fn ibpad +is used to change the primary address of the device being communicated +with to +.Fa pad . +See +.Fn ibdev +for an explanation. +.Pp +The function +.Fn ibpct +is used to make the device associated with the handle the +controller-in-charge. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibpoke +is obsolete, and not implemented. +.Pp +The function +.Fn ibppc +is used to configure the parallel poll response to +.Fa v . +.Em This function is currently not implemented. +.Pp +The fucntion +.Fn ibrd +is used to receive +.Fa cnt +bytes from the device, and store it to the address passed as +.Fa buffer . +.Pp +The function +.Fn ibrda +behaves similar to +.Fn ibrd +except it operates asynchronously, and returns immediately to the +caller. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibrdf +read data from the device, and appends it to the file with the name +.Fa flname . +.Em This function is currently not implemented. +.Pp +The function +.Fn ibrdkey +is obsolete, and not implemented. +.Pp +The function +.Fn ibrpp +performs a parallel poll, and stores the result at the location +pointed to by +.Fa ppr . +.Em This function is currently not implemented. +.Pp +The function +.Fn ibrsc +makes the board specified by the handle the +.Em system controller +if the argument +.Fa v +is non-zero. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibrsp +conducts a serial poll, and stores the result in the byte pointed +to by +.Fa spr . +.Em This function is currently not implemented. +.Pp +The function +.Fn ibrsv +sets the serial poll response of the board to +.Fa v , +possibly requesting service from the controller if the SRQ bit (0x40) +is set. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibsad +changes the secondary address of the device being communicated with to +.Fa sad . +See +.Fn ibdev +for an explanation. +.Pp +The function +.Fn ibsgnl +is obsolete, and not implemented. +.Pp +The function +.Fn ibsic +asserts the +.Em Interface Clear (IFC) +signal line on the bus for at least 100 microseconds. +This will make all devices attached to the bus to unlisten and untalk. +This function should only be executed on the system controller. +.Pp +The function +.Fn ibsre +asserts the +.Em Remote Enable (REN) +signal line on the bus if argument +.Fa v +is non-zero, or deasserts it otherwise. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibsrq +is obsolete, and not implemented. +.Pp +The function +.Fn ibstop +stops or aborts any asynchronous I/O operation. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibtmo +reconfigures the communication timeout. +See +.Fn ibdev +for an explanation. +.Pp +The function +.Fn ibtrap +is obsolete, and not implemented. +.Pp +The function +.Fn ibtrg +sends a +.Em Group Execute Trigger (GET) +command to the device. +.Pp +The function +.Fn ibwait +waits for a status condition as specified by +.Fa mask . +If +.Fa mask +is given as zero, it returns immediately. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibwrt +is used to send +.Fa cnt +bytes to the device, starting at the address pointed to by +.Fa buffer . +.Pp +The function +.Fn ibwrta +performs the same operation as +.Fn ibwrt +in an asynchronous way, returning immediately to the caller. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibwrtf +opens the file named by +.Fa flname , +and sends its contents to the device. +.Em This function is currently not implemented. +.Pp +The function +.Fn ibwrtkey +is obsolete, and not implemented +.Pp +The function +.Fn ibxtrc +is obsolete, and not implemented. +.Sh RETURN VALUES +The function +.Fn ibdev +returns a handle to be used for the remaining functions. +Upon failure, -1 is returned. +.Pp +All other functions return the value of the variable +.Dv ibsta . +.Sh DIAGNOSTICS +None. +.Sh COMPATIBILITY +The +.Nm +library tries to be compatible with the Linux GPIB library, +which in turn appears to be compatible with the GPIB library +shipped by National Instruments. +.Sh ERRORS +Errors in the functions above might set +.Dv errno +to one of these values: +.Bl -tag -width Er +.It Bq Er ENOENT +No such file or directory. +.It Bq Er EIO +Input/output error. +.It Bq Er ENXIO +Device not configured. +.It Bq Er E2BIG +Argument list too long. +.It Bq Er ENOMEM +Cannot allocate memory. +.It Bq Er EACCES +Permission denied. +.It Bq Er EFAULT +Bad address. +.It Bq Er EBUSY +Device busy. +.It Bq Er EINVAL +Invalid argument. +.It Bq Er ENFILE +Too many open files in system. +.It Bq Er EMFILE +Too many open files. +.It Bq Er EOPNOTSUPP +Operation not supported. +.El +.Sh SEE ALSO +.Xr gpib 4 +.Sh HISTORY +The +.Nm +library was written by +.An Poul-Henning Kamp +and first appeared in +.Fx 5.4 . +.Sh AUTHORS +This manual page was written by +.An J\(:org Wunsch . +.Sh BUGS +Currently, the library can only handle a single +.Xr gpib 4 +device with instance number 0. +.Pp +Many functions are currently not implemented, see above for details. -- cgit v1.1 From 6815fe2938e067978f7c3fc931e213b3a5ae7f05 Mon Sep 17 00:00:00 2001 From: ume Date: Sun, 7 Feb 2010 04:27:18 +0000 Subject: MFC r202916: Make strsignal(3) thread-safe. --- lib/libc/string/strsignal.c | 52 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libc/string/strsignal.c b/lib/libc/string/strsignal.c index e4267a3..c51f34d 100644 --- a/lib/libc/string/strsignal.c +++ b/lib/libc/string/strsignal.c @@ -33,22 +33,64 @@ static char sccsid[] = "@(#)strerror.c 8.1 (Berkeley) 6/4/93"; #include __FBSDID("$FreeBSD$"); +#include "namespace.h" #if defined(NLS) #include #endif - #include #include +#include #include #include +#include "reentrant.h" +#include "un-namespace.h" #define UPREFIX "Unknown signal" +static char sig_ebuf[NL_TEXTMAX]; +static char sig_ebuf_err[NL_TEXTMAX]; +static once_t sig_init_once = ONCE_INITIALIZER; +static thread_key_t sig_key; +static int sig_keycreated = 0; + +static void +sig_keycreate(void) +{ + sig_keycreated = (thr_keycreate(&sig_key, free) == 0); +} + +static char * +sig_tlsalloc(void) +{ + char *ebuf = NULL; + + if (thr_main() != 0) + ebuf = sig_ebuf; + else { + if (thr_once(&sig_init_once, sig_keycreate) != 0 || + !sig_keycreated) + goto thr_err; + if ((ebuf = thr_getspecific(sig_key)) == NULL) { + if ((ebuf = malloc(sizeof(sig_ebuf))) == NULL) + goto thr_err; + if (thr_setspecific(sig_key, ebuf) != 0) { + free(ebuf); + ebuf = NULL; + goto thr_err; + } + } + } +thr_err: + if (ebuf == NULL) + ebuf = sig_ebuf_err; + return (ebuf); +} + /* XXX: negative 'num' ? (REGR) */ char * strsignal(int num) { - static char ebuf[NL_TEXTMAX]; + char *ebuf; char tmp[20]; size_t n; int signum; @@ -60,6 +102,8 @@ strsignal(int num) catd = catopen("libc", NL_CAT_LOCALE); #endif + ebuf = sig_tlsalloc(); + if (num > 0 && num < sys_nsig) { n = strlcpy(ebuf, #if defined(NLS) @@ -67,7 +111,7 @@ strsignal(int num) #else sys_siglist[num], #endif - sizeof(ebuf)); + sizeof(sig_ebuf)); } else { n = strlcpy(ebuf, #if defined(NLS) @@ -75,7 +119,7 @@ strsignal(int num) #else UPREFIX, #endif - sizeof(ebuf)); + sizeof(sig_ebuf)); } signum = num; -- cgit v1.1 From 4d2b1b10d9a30417eb60d614c3dab6805613a24d Mon Sep 17 00:00:00 2001 From: dougb Date: Sun, 7 Feb 2010 20:26:45 +0000 Subject: MFC 202960: Copyright-only changes to generated files as part of the 9.6.1-P3 update --- lib/bind/dns/code.h | 2 +- lib/bind/dns/dns/enumclass.h | 2 +- lib/bind/dns/dns/enumtype.h | 2 +- lib/bind/dns/dns/rdatastruct.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/bind/dns/code.h b/lib/bind/dns/code.h index db8d8c5..264fc11 100644 --- a/lib/bind/dns/code.h +++ b/lib/bind/dns/code.h @@ -1,7 +1,7 @@ /* $FreeBSD$ */ /* - * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2003 Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any diff --git a/lib/bind/dns/dns/enumclass.h b/lib/bind/dns/dns/enumclass.h index b1e363f..e33d4c4 100644 --- a/lib/bind/dns/dns/enumclass.h +++ b/lib/bind/dns/dns/enumclass.h @@ -1,7 +1,7 @@ /* $FreeBSD$ */ /* - * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2003 Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any diff --git a/lib/bind/dns/dns/enumtype.h b/lib/bind/dns/dns/enumtype.h index 74a6db7..4ad3358 100644 --- a/lib/bind/dns/dns/enumtype.h +++ b/lib/bind/dns/dns/enumtype.h @@ -1,7 +1,7 @@ /* $FreeBSD$ */ /* - * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2003 Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any diff --git a/lib/bind/dns/dns/rdatastruct.h b/lib/bind/dns/dns/rdatastruct.h index d723941..94f57b8 100644 --- a/lib/bind/dns/dns/rdatastruct.h +++ b/lib/bind/dns/dns/rdatastruct.h @@ -1,7 +1,7 @@ /* $FreeBSD$ */ /* - * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2003 Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any -- cgit v1.1 From c39123cbe1bc4b4f762bd1e2b62779a329c24505 Mon Sep 17 00:00:00 2001 From: emaste Date: Tue, 9 Feb 2010 13:07:32 +0000 Subject: MFC r203077: Add missing return, in a rare case where we can't allocate memory in deallocate. Submitted by: Ryan Stone (rysto32 at gmail dot com) Approved by: jasone --- lib/libc/stdlib/malloc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index 80e26e6..8bc2c85 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -3861,6 +3861,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_dalloc_small(arena, chunk, ptr, mapelm); malloc_spin_unlock(&arena->lock); + return; } mag_rack = rack; } -- cgit v1.1 From 6d70c92b6e47fbf677df3874a9d4a40f1a8e9ced Mon Sep 17 00:00:00 2001 From: gavin Date: Sat, 13 Feb 2010 10:26:00 +0000 Subject: Merge r203393,r203395 from head: The multiplicand a = 0x5deece66d = 25214903917, not 0xfdeece66d. This bug in the man page has gone unnoticed for over 15 years! PR: docs/143461 Submitted by: Jeremy Huddleston jeremyhu apple.com --- lib/libc/gen/rand48.3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/rand48.3 b/lib/libc/gen/rand48.3 index a703ba0..8d325a1 100644 --- a/lib/libc/gen/rand48.3 +++ b/lib/libc/gen/rand48.3 @@ -12,7 +12,7 @@ .\" @(#)rand48.3 V1.0 MB 8 Oct 1993 .\" $FreeBSD$ .\" -.Dd October 8, 1993 +.Dd February 2, 2010 .Dt RAND48 3 .Os .Sh NAME @@ -57,7 +57,7 @@ The particular formula employed is r(n+1) = (a * r(n) + c) mod m where the default values are -for the multiplicand a = 0xfdeece66d = 25214903917 and +for the multiplicand a = 0x5deece66d = 25214903917 and the addend c = 0xb = 11. The modulo is always fixed at m = 2 ** 48. r(n) is called the seed of the random number generator. -- cgit v1.1 From 2107ded34e22813b72aeb97cc608029162d93758 Mon Sep 17 00:00:00 2001 From: kib Date: Wed, 17 Feb 2010 09:09:12 +0000 Subject: MFC r203441: Placate new binutils, by using 16-bit %ax instead of 32-bit %eax as an argument for fnstsw. Explicitely specify sizes for the XMM control and status word and X87 control and status words. --- lib/msun/amd64/fenv.c | 11 +++++++---- lib/msun/amd64/fenv.h | 13 ++++++++----- lib/msun/i387/fenv.c | 15 +++++++++------ lib/msun/i387/fenv.h | 17 ++++++++++------- 4 files changed, 34 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/msun/amd64/fenv.c b/lib/msun/amd64/fenv.c index cf07c8b..e126070 100644 --- a/lib/msun/amd64/fenv.c +++ b/lib/msun/amd64/fenv.c @@ -86,7 +86,7 @@ fegetenv(fenv_t *envp) int feholdexcept(fenv_t *envp) { - int mxcsr; + __uint32_t mxcsr; __stmxcsr(&mxcsr); __fnstenv(&envp->__x87); @@ -101,7 +101,8 @@ feholdexcept(fenv_t *envp) int feupdateenv(const fenv_t *envp) { - int mxcsr, status; + __uint32_t mxcsr; + __uint16_t status; __fnstsw(&status); __stmxcsr(&mxcsr); @@ -113,7 +114,8 @@ feupdateenv(const fenv_t *envp) int __feenableexcept(int mask) { - int mxcsr, control, omask; + __uint32_t mxcsr, omask; + __uint16_t control; mask &= FE_ALL_EXCEPT; __fnstcw(&control); @@ -129,7 +131,8 @@ __feenableexcept(int mask) int __fedisableexcept(int mask) { - int mxcsr, control, omask; + __uint32_t mxcsr, omask; + __uint16_t control; mask &= FE_ALL_EXCEPT; __fnstcw(&control); diff --git a/lib/msun/amd64/fenv.h b/lib/msun/amd64/fenv.h index b4d7379..875664a 100644 --- a/lib/msun/amd64/fenv.h +++ b/lib/msun/amd64/fenv.h @@ -110,7 +110,8 @@ feclearexcept(int __excepts) static __inline int fegetexceptflag(fexcept_t *__flagp, int __excepts) { - int __mxcsr, __status; + __uint32_t __mxcsr; + __uint16_t __status; __stmxcsr(&__mxcsr); __fnstsw(&__status); @@ -124,7 +125,8 @@ int feraiseexcept(int __excepts); static __inline int fetestexcept(int __excepts) { - int __mxcsr, __status; + __uint32_t __mxcsr; + __uint16_t __status; __stmxcsr(&__mxcsr); __fnstsw(&__status); @@ -134,7 +136,7 @@ fetestexcept(int __excepts) static __inline int fegetround(void) { - int __control; + __uint16_t __control; /* * We assume that the x87 and the SSE unit agree on the @@ -149,7 +151,8 @@ fegetround(void) static __inline int fesetround(int __round) { - int __mxcsr, __control; + __uint32_t __mxcsr; + __uint16_t __control; if (__round & ~_ROUND_MASK) return (-1); @@ -197,7 +200,7 @@ int fedisableexcept(int __mask); static __inline int fegetexcept(void) { - int __control; + __uint16_t __control; /* * We assume that the masks for the x87 and the SSE unit are diff --git a/lib/msun/i387/fenv.c b/lib/msun/i387/fenv.c index ace2d32..5c996b3 100644 --- a/lib/msun/i387/fenv.c +++ b/lib/msun/i387/fenv.c @@ -87,7 +87,7 @@ int fesetexceptflag(const fexcept_t *flagp, int excepts) { fenv_t env; - int mxcsr; + __uint32_t mxcsr; __fnstenv(&env); env.__status &= ~excepts; @@ -117,7 +117,7 @@ feraiseexcept(int excepts) int fegetenv(fenv_t *envp) { - int mxcsr; + __uint32_t mxcsr; __fnstenv(envp); /* @@ -135,7 +135,7 @@ fegetenv(fenv_t *envp) int feholdexcept(fenv_t *envp) { - int mxcsr; + __uint32_t mxcsr; __fnstenv(envp); __fnclex(); @@ -152,7 +152,8 @@ feholdexcept(fenv_t *envp) int feupdateenv(const fenv_t *envp) { - int mxcsr, status; + __uint32_t mxcsr; + __uint16_t status; __fnstsw(&status); if (__HAS_SSE()) @@ -167,7 +168,8 @@ feupdateenv(const fenv_t *envp) int __feenableexcept(int mask) { - int mxcsr, control, omask; + __uint32_t mxcsr, omask; + __uint16_t control; mask &= FE_ALL_EXCEPT; __fnstcw(&control); @@ -188,7 +190,8 @@ __feenableexcept(int mask) int __fedisableexcept(int mask) { - int mxcsr, control, omask; + __uint32_t mxcsr, omask; + __uint16_t control; mask &= FE_ALL_EXCEPT; __fnstcw(&control); diff --git a/lib/msun/i387/fenv.h b/lib/msun/i387/fenv.h index c4eab02..cc58a92 100644 --- a/lib/msun/i387/fenv.h +++ b/lib/msun/i387/fenv.h @@ -114,7 +114,7 @@ static __inline int feclearexcept(int __excepts) { fenv_t __env; - int __mxcsr; + __uint32_t __mxcsr; if (__excepts == FE_ALL_EXCEPT) { __fnclex(); @@ -134,7 +134,8 @@ feclearexcept(int __excepts) static __inline int fegetexceptflag(fexcept_t *__flagp, int __excepts) { - int __mxcsr, __status; + __uint32_t __mxcsr; + __uint16_t __status; __fnstsw(&__status); if (__HAS_SSE()) @@ -151,7 +152,8 @@ int feraiseexcept(int __excepts); static __inline int fetestexcept(int __excepts) { - int __mxcsr, __status; + __uint32_t __mxcsr; + __uint16_t __status; __fnstsw(&__status); if (__HAS_SSE()) @@ -164,7 +166,7 @@ fetestexcept(int __excepts) static __inline int fegetround(void) { - int __control; + __uint16_t __control; /* * We assume that the x87 and the SSE unit agree on the @@ -179,7 +181,8 @@ fegetround(void) static __inline int fesetround(int __round) { - int __mxcsr, __control; + __uint32_t __mxcsr; + __uint16_t __control; if (__round & ~_ROUND_MASK) return (-1); @@ -206,7 +209,7 @@ static __inline int fesetenv(const fenv_t *__envp) { fenv_t __env = *__envp; - int __mxcsr; + __uint32_t __mxcsr; __mxcsr = __get_mxcsr(__env); __set_mxcsr(__env, 0xffffffff); @@ -234,7 +237,7 @@ int fedisableexcept(int __mask); static __inline int fegetexcept(void) { - int __control; + __uint16_t __control; /* * We assume that the masks for the x87 and the SSE unit are -- cgit v1.1 From 7d0f076c54c2fecb825ddadf2950088886baf938 Mon Sep 17 00:00:00 2001 From: brucec Date: Thu, 18 Feb 2010 10:51:31 +0000 Subject: MFC r203322: Fix typo of ENOTCONN. Add missing RETURN VALUES section in sctp_opt_info(3). Approved by: rrs (mentor) --- lib/libc/net/sctp_opt_info.3 | 2 ++ lib/libc/net/sctp_recvmsg.3 | 2 +- lib/libc/net/sctp_send.3 | 2 +- lib/libc/net/sctp_sendmsg.3 | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/net/sctp_opt_info.3 b/lib/libc/net/sctp_opt_info.3 index 581a220..f010613 100644 --- a/lib/libc/net/sctp_opt_info.3 +++ b/lib/libc/net/sctp_opt_info.3 @@ -87,6 +87,8 @@ socket options. .Dv SCTP_PEER_AUTH_CHUNKS .Pp .Dv SCTP_LOCAL_AUTH_CHUNKS +.Sh RETURN VALUES +The call returns 0 on success and -1 upon error. .Sh ERRORS The .Fn sctp_opt_info diff --git a/lib/libc/net/sctp_recvmsg.3 b/lib/libc/net/sctp_recvmsg.3 index f874880..a5926bf 100644 --- a/lib/libc/net/sctp_recvmsg.3 +++ b/lib/libc/net/sctp_recvmsg.3 @@ -269,7 +269,7 @@ This generally indicates that the interface has stopped sending, but may be caused by transient congestion. .It Bq Er EHOSTUNREACH The remote host was unreachable. -.It Bq Er ENOTCON +.It Bq Er ENOTCONN On a one-to-one style socket no association exists. .It Bq Er ECONNRESET An abort was received by the stack while the user was diff --git a/lib/libc/net/sctp_send.3 b/lib/libc/net/sctp_send.3 index f6f0c95..010430a 100644 --- a/lib/libc/net/sctp_send.3 +++ b/lib/libc/net/sctp_send.3 @@ -319,7 +319,7 @@ This generally indicates that the interface has stopped sending, but may be caused by transient congestion. .It Bq Er EHOSTUNREACH The remote host was unreachable. -.It Bq Er ENOTCON +.It Bq Er ENOTCONN On a one-to-one style socket no association exists. .It Bq Er ECONNRESET An abort was received by the stack while the user was diff --git a/lib/libc/net/sctp_sendmsg.3 b/lib/libc/net/sctp_sendmsg.3 index 0c5fa02..52faa31 100644 --- a/lib/libc/net/sctp_sendmsg.3 +++ b/lib/libc/net/sctp_sendmsg.3 @@ -296,7 +296,7 @@ This generally indicates that the interface has stopped sending, but may be caused by transient congestion. .It Bq Er EHOSTUNREACH The remote host was unreachable. -.It Bq Er ENOTCON +.It Bq Er ENOTCONN On a one-to-one style socket no association exists. .It Bq Er ECONNRESET An abort was received by the stack while the user was -- cgit v1.1 From 927a538c83b04beaf071c9350ee88de781a66a46 Mon Sep 17 00:00:00 2001 From: brucec Date: Thu, 18 Feb 2010 10:55:42 +0000 Subject: MFC r203323: Remove extra semicolon. Approved by: rrs (mentor) --- lib/libc/net/sctp_sys_calls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/net/sctp_sys_calls.c b/lib/libc/net/sctp_sys_calls.c index 715fcbd..d876293 100644 --- a/lib/libc/net/sctp_sys_calls.c +++ b/lib/libc/net/sctp_sys_calls.c @@ -784,7 +784,7 @@ sctp_sendx(int sd, const void *msg, size_t msg_len, free(buf); if (ret != 0) { if (errno == EALREADY) { - no_end_cx = 1;; + no_end_cx = 1; goto continue_send; } return (ret); -- cgit v1.1 From 55df83ef97851d7b62a966a05e14171862779651 Mon Sep 17 00:00:00 2001 From: ru Date: Thu, 18 Feb 2010 16:52:49 +0000 Subject: MFC: r203918: Unbreak makefiles by removing mentions of DEFAULT_THREAD_LIB. --- lib/libc_r/Makefile | 16 ---------------- lib/libkse/Makefile | 20 -------------------- 2 files changed, 36 deletions(-) (limited to 'lib') diff --git a/lib/libc_r/Makefile b/lib/libc_r/Makefile index 0b286f2..a424876 100644 --- a/lib/libc_r/Makefile +++ b/lib/libc_r/Makefile @@ -10,10 +10,6 @@ .include -.if ${DEFAULT_THREAD_LIB} == "libc_r" && ${SHLIBDIR} == "/usr/lib" -SHLIBDIR= /lib -.endif - LIB=c_r SHLIB_MAJOR= 7 CFLAGS+=-DPTHREAD_KERNEL @@ -32,16 +28,4 @@ PRECIOUSLIB= .include "${.CURDIR}/uthread/Makefile.inc" .include "${.CURDIR}/sys/Makefile.inc" -.if ${DEFAULT_THREAD_LIB} == "libc_r" -.if ${MK_INSTALLLIB} != "no" -SYMLINKS+=lib${LIB}.a ${LIBDIR}/libpthread.a -.endif -.if !defined(NO_PIC) -SYMLINKS+=lib${LIB}.so ${LIBDIR}/libpthread.so -.endif -.if ${MK_PROFILE} != "no" -SYMLINKS+=lib${LIB}_p.a ${LIBDIR}/libpthread_p.a -.endif -.endif - .include diff --git a/lib/libkse/Makefile b/lib/libkse/Makefile index 9fe09c5..c4e2b5a 100644 --- a/lib/libkse/Makefile +++ b/lib/libkse/Makefile @@ -10,15 +10,7 @@ .include -.if ${DEFAULT_THREAD_LIB} == "libkse" || ${MK_LIBTHR} == "no" -LIB=kse -.if ${SHLIBDIR} == "/usr/lib" -SHLIBDIR= /lib -.endif -.else SHLIB=kse -.endif - SHLIB_MAJOR= 4 CFLAGS+=-DPTHREAD_KERNEL CFLAGS+=-I${.CURDIR}/../libc/include -I${.CURDIR}/thread \ @@ -51,16 +43,4 @@ PRECIOUSLIB= .include "${.CURDIR}/sys/Makefile.inc" .include "${.CURDIR}/thread/Makefile.inc" -.if ${DEFAULT_THREAD_LIB} == "libkse" || ${MK_LIBTHR} == "no" -.if ${MK_INSTALLLIB} != "no" -SYMLINKS+=lib${LIB}.a ${LIBDIR}/libpthread.a -.endif -.if !defined(NO_PIC) -SYMLINKS+=lib${LIB}.so ${LIBDIR}/libpthread.so -.endif -.if ${MK_PROFILE} != "no" -SYMLINKS+=lib${LIB}_p.a ${LIBDIR}/libpthread_p.a -.endif -.endif - .include -- cgit v1.1 From e345edec0f48f330d3c572936a468e2a5552e256 Mon Sep 17 00:00:00 2001 From: ru Date: Mon, 22 Feb 2010 15:58:10 +0000 Subject: MFC: r204008: realloc() with a proper amount of memory. --- lib/libjail/jail.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libjail/jail.c b/lib/libjail/jail.c index 39c88d0..c82cb11 100644 --- a/lib/libjail/jail.c +++ b/lib/libjail/jail.c @@ -191,7 +191,7 @@ jailparam_all(struct jailparam **jpp) /* Add the parameter to the list */ if (njp >= nlist) { nlist *= 2; - jp = realloc(jp, nlist * sizeof(jp)); + jp = realloc(jp, nlist * sizeof(*jp)); if (jp == NULL) { jailparam_free(jp, njp); return (-1); -- cgit v1.1 From ddfa9b895b20fc04a73aee44c52f2db7b47d48b6 Mon Sep 17 00:00:00 2001 From: mckusick Date: Fri, 26 Feb 2010 21:49:11 +0000 Subject: MFC of 203763, 203764, 203768, 203769, 203770, 203782, and 203784. These fixes correct a problem in the file system that treats large inode numbers as negative rather than unsigned. For a default (16K block) file system, this bug began to show up at a file system size above about 16Tb. These fixes also update newfs to ensure that it will never create a filesystem with more than 2^32 inodes. They also update libufs, tunefs, and growfs so that they properly handle inode numbers as unsigned. Reported by: Scott Burns, John Kilburg, and Bruce Evans Followup by: Jeff Roberson PR: 133980 --- lib/libufs/cgroup.c | 2 +- lib/libufs/sblock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libufs/cgroup.c b/lib/libufs/cgroup.c index 2185682..28e5ea8 100644 --- a/lib/libufs/cgroup.c +++ b/lib/libufs/cgroup.c @@ -59,7 +59,7 @@ cgread1(struct uufsd *disk, int c) fs = &disk->d_fs; - if (c >= fs->fs_ncg) { + if ((unsigned)c >= fs->fs_ncg) { return (0); } ccg = fsbtodb(fs, cgtod(fs, c)) * disk->d_bsize; diff --git a/lib/libufs/sblock.c b/lib/libufs/sblock.c index c9125f2..8986290 100644 --- a/lib/libufs/sblock.c +++ b/lib/libufs/sblock.c @@ -93,7 +93,7 @@ int sbwrite(struct uufsd *disk, int all) { struct fs *fs; - int i; + unsigned i; ERROR(disk, NULL); -- cgit v1.1 From 05b666175c3574ab196b72aa01279fbccde0db29 Mon Sep 17 00:00:00 2001 From: delphij Date: Tue, 2 Mar 2010 01:56:55 +0000 Subject: MFC x86emu/x86bios emulator and make previously i386 only dpms and vesa framebuffer driver, etc. work on FreeBSD/amd64. A significant amount of improvements were done by jkim@ during the recent months to make vesa(4) work better, over the initial code import. This work is based on OpenBSD's x86emu implementation and contributed by paradox and swell.k at gmail com. Hopefully I have stolen all their work to 8-STABLE :) All bugs in this commit are mine, as usual. --- lib/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 7cacb70..00b1c60 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -112,6 +112,7 @@ _libsmb= libsmb _libncp= libncp .endif _libsmb= libsmb +_libvgl= libvgl .endif .if ${MACHINE_ARCH} == "powerpc" -- cgit v1.1 From 4d425cbc0262bb55ea0f0cf425f77b655b5cb044 Mon Sep 17 00:00:00 2001 From: brooks Date: Wed, 3 Mar 2010 21:47:25 +0000 Subject: MFC r201959 Use the correct types to store uids and gids in the credential cache and eliminate an inappropriate use of NGROUPS. --- lib/libc/rpc/svc_auth_des.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libc/rpc/svc_auth_des.c b/lib/libc/rpc/svc_auth_des.c index 84f1e19..de4d1b4 100644 --- a/lib/libc/rpc/svc_auth_des.c +++ b/lib/libc/rpc/svc_auth_des.c @@ -449,10 +449,10 @@ cache_spot(key, name, timestamp) #define INVALID -1 /* grouplen, if cache entry is invalid */ struct bsdcred { - short uid; /* cached uid */ - short gid; /* cached gid */ - short grouplen; /* length of cached groups */ - short groups[NGROUPS]; /* cached groups */ + uid_t uid; /* cached uid */ + gid_t gid; /* cached gid */ + int grouplen; /* length of cached groups */ + gid_t groups[NGRPS]; /* cached groups */ }; /* -- cgit v1.1 From 0a48f6a9f77f976420b17a5d1d10ff4e4ac409c8 Mon Sep 17 00:00:00 2001 From: kib Date: Thu, 4 Mar 2010 07:12:44 +0000 Subject: MFC r204307: Make pause(3) implementation not depended on the legacy sigcompat.c interfaces. Do not block rt signals during and after pause(3) calls. Use private libc namespace to call proper methods. --- lib/libc/gen/pause.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/gen/pause.c b/lib/libc/gen/pause.c index 86d3643..cb0fa97 100644 --- a/lib/libc/gen/pause.c +++ b/lib/libc/gen/pause.c @@ -33,8 +33,10 @@ static char sccsid[] = "@(#)pause.c 8.1 (Berkeley) 6/4/93"; #include __FBSDID("$FreeBSD$"); +#include "namespace.h" #include #include +#include "un-namespace.h" /* * Backwards compatible pause. @@ -42,7 +44,11 @@ __FBSDID("$FreeBSD$"); int __pause() { - return sigpause(sigblock(0L)); + sigset_t oset; + + if (_sigprocmask(SIG_BLOCK, NULL, &oset) == -1) + return (-1); + return (_sigsuspend(&oset)); } __weak_reference(__pause, pause); __weak_reference(__pause, _pause); -- cgit v1.1 From e8bb8a0692a40f30fbf7cca11e49f09130b585c5 Mon Sep 17 00:00:00 2001 From: marcel Date: Sun, 7 Mar 2010 00:07:00 +0000 Subject: MFC revs 203696, 203783: Add PT_VM_TIMESTAMP and PT_VM_ENTRY so that the tracing process can obtain the memory map of the traced process. Requested by: kib@ --- lib/libc/sys/ptrace.2 | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/sys/ptrace.2 b/lib/libc/sys/ptrace.2 index 9d8f550..8265fb6 100644 --- a/lib/libc/sys/ptrace.2 +++ b/lib/libc/sys/ptrace.2 @@ -2,7 +2,7 @@ .\" $NetBSD: ptrace.2,v 1.2 1995/02/27 12:35:37 cgd Exp $ .\" .\" This file is in the public domain. -.Dd March 27, 2009 +.Dd February 11, 2010 .Dt PTRACE 2 .Os .Sh NAME @@ -327,6 +327,68 @@ This request will trace the specified process on each system call exit. .It PT_SYSCALL This request will trace the specified process on each system call entry and exit. +.It PT_VM_TIMESTAMP +This request returns the generation number or timestamp of the memory map of +the traced process as the return value from +.Fn ptrace . +This provides a low-cost way for the tracing process to determine if the +VM map changed since the last time this request was made. +.It PT_VM_ENTRY +This request is used to iterate over the entries of the VM map of the traced +process. +The +.Fa addr +argument specifies a pointer to a +.Vt "struct ptrace_vm_entry" , +which is defined as follows: +.Bd -literal +struct ptrace_vm_entry { + int pve_entry; + int pve_timestamp; + u_long pve_start; + u_long pve_end; + u_long pve_offset; + u_int pve_prot; + u_int pve_pathlen; + long pve_fileid; + uint32_t pve_fsid; + char *pve_path; +}; +.Ed +.Pp +The first entry is returned by setting +.Va pve_entry +to zero. +Subsequent entries are returned by leaving +.Va pve_entry +unmodified from the value returned by previous requests. +The +.Va pve_timestamp +field can be used to detect changes to the VM map while iterating over the +entries. +The tracing process can then take appropriate action, such as restarting. +By setting +.Va pve_pathlen +to a non-zero value on entry, the pathname of the backing object is returned +in the buffer pointed to by +.Va pve_path , +provided the entry is backed by a vnode. +The +.Va pve_pathlen +field is updated with the actual length of the pathname (including the +terminating null character). +The +.Va pve_offset +field is the offset within the backing object at which the range starts. +The range is located in the VM space at +.Va pve_start +and extends up to +.Va pve_end +(inclusive). +.Pp +The +.Fa data +argument is ignored. .El .Pp Additionally, machine-specific requests can exist. @@ -376,6 +438,11 @@ or .Dv PT_SETDBREGS was attempted on a process with no valid register set. (This is normally true only of system processes.) +.It +.Dv PT_VM_ENTRY +was given an invalid value for +.Fa pve_entry . +This can also be caused by changes to the VM map of the process. .El .It Bq Er EBUSY .Bl -bullet -compact @@ -405,6 +472,22 @@ on a process in violation of the requirements listed under .Dv PT_ATTACH above. .El +.It Bq Er ENOENT +.Bl -bullet -compact +.It +.Dv PT_VM_ENTRY +previously returned the last entry of the memory map. +No more entries exist. +.El +.It Bq Er ENAMETOOLONG +.Bl -bullet -compact +.It +.Dv PT_VM_ENTRY +cannot return the pathname of the backing object because the buffer is not big +enough. +.Fa pve_pathlen +holds the minimum buffer size required on return. +.El .El .Sh SEE ALSO .Xr execve 2 , -- cgit v1.1 From dc61101b9e948e19b6506d55ef3c0f7eaa40a0be Mon Sep 17 00:00:00 2001 From: gshapiro Date: Sun, 7 Mar 2010 02:02:07 +0000 Subject: MFC: Enable the use of nanosleep() instead of using pause() and signals. This Makefile change can be removed when the next version of sendmail is imported as it will have this built in to the FreeBSD conf.h section. --- lib/libsm/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/libsm/Makefile b/lib/libsm/Makefile index 38c7806..0fdcaa1 100644 --- a/lib/libsm/Makefile +++ b/lib/libsm/Makefile @@ -7,6 +7,7 @@ SENDMAIL_DIR=${.CURDIR}/../../contrib/sendmail CFLAGS+=-I${SENDMAIL_DIR}/src -I${SENDMAIL_DIR}/include -I. CFLAGS+=-DNEWDB -DNIS -DMAP_REGEX -DNOT_SENDMAIL +CFLAGS+=-DHAVE_NANOSLEEP .if ${MK_INET6_SUPPORT} != "no" CFLAGS+=-DNETINET6 -- cgit v1.1 From c4a13ea37fbc64339be45f9da5746f4c1faae737 Mon Sep 17 00:00:00 2001 From: jh Date: Fri, 12 Mar 2010 06:56:51 +0000 Subject: MFC r204447: In _gettemp(), check that the length of the path doesn't exceed MAXPATHLEN. Otherwise the path name (or part of it) may not fit to carrybuf causing a buffer overflow. PR: bin/140228 --- lib/libc/stdio/mktemp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/libc/stdio/mktemp.c b/lib/libc/stdio/mktemp.c index 3f1e699..a30b930 100644 --- a/lib/libc/stdio/mktemp.c +++ b/lib/libc/stdio/mktemp.c @@ -116,6 +116,10 @@ _gettemp(path, doopen, domkdir, slen) for (trv = path; *trv != '\0'; ++trv) ; + if (trv - path >= MAXPATHLEN) { + errno = ENAMETOOLONG; + return (0); + } trv -= slen; suffp = trv; --trv; -- cgit v1.1 From 883fd46760684a12a08dee7a5e8c39ff4b1555b8 Mon Sep 17 00:00:00 2001 From: marius Date: Mon, 15 Mar 2010 18:32:57 +0000 Subject: MFC: r204974, r205002 - The OPSZ macro actually only does the right thing for int32 and int64 operands but not for double and extended double ones. Instead of trying to fix the macro just nuke it and unroll the loops in the correct way though as extended double operands turn out to be the only special case. - For FxTO{s,d,q} the source operand is int64 so rs2 has to be re-decoded after setting type accordingly as it's generally decoded using the low 2 bits as the type, which are 0 for these three instructions. - Similarly, in case of F{s,d,q}TOx the target is int64 so rd has to be re-decoded using not only the operand mask appropriate for int64 but also the correct register number encoding. - Use const where appropriate. - Wrap long lines. Submitted by: Peter Jeremy (partly) --- lib/libc/sparc64/fpu/fpu.c | 48 ++++++++++++++++++------------------------ lib/libc/sparc64/fpu/fpu_emu.h | 2 +- 2 files changed, 21 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/libc/sparc64/fpu/fpu.c b/lib/libc/sparc64/fpu/fpu.c index 74f07ec..7c8a3a9 100644 --- a/lib/libc/sparc64/fpu/fpu.c +++ b/lib/libc/sparc64/fpu/fpu.c @@ -97,7 +97,7 @@ __FBSDID("$FreeBSD$"); #define X8(x) X4(x),X4(x) #define X16(x) X8(x),X8(x) -static char cx_to_trapx[] = { +static const char cx_to_trapx[] = { X1(FSR_NX), X2(FSR_DZ), X4(FSR_UF), @@ -113,7 +113,8 @@ int __fpe_debug = 0; #endif #endif /* FPU_DEBUG */ -static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t, u_long); +static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t, + u_long); /* * Need to use an fpstate on the stack; we could switch, so we cannot safely @@ -169,7 +170,7 @@ __fpu_exception(struct utrapframe *uf) void __fpu_dumpfpn(struct fpn *fp) { - static char *class[] = { + static const char *const class[] = { "SNAN", "QNAN", "ZERO", "NUM", "INF" }; @@ -181,15 +182,11 @@ __fpu_dumpfpn(struct fpn *fp) } #endif -static int opmask[] = {0, 0, 1, 3}; +static const int opmask[] = {0, 0, 1, 3, 1}; /* Decode 5 bit register field depending on the type. */ #define RN_DECODE(tp, rn) \ - ((tp == FTYPE_DBL || tp == FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) & \ - ~opmask[tp]) - -/* Operand size in 32-bit registers. */ -#define OPSZ(tp) ((tp) == FTYPE_LNG ? 2 : (1 << (tp))) + ((tp) >= FTYPE_DBL ? INSFPdq_RN(rn) & ~opmask[tp] : (rn)) /* * Helper for forming the below case statements. Build only the op3 and opf @@ -209,8 +206,6 @@ static void __fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand, u_int32_t xor) { - u_int64_t tmp64; - int i; if (type == FTYPE_INT || type == FTYPE_SNG) __fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor); @@ -219,13 +214,10 @@ __fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand, * Need to use the double versions to be able to access * the upper 32 fp registers. */ - for (i = 0; i < OPSZ(type); i += 2, rd += 2, rs2 += 2) { - tmp64 = __fpu_getreg64(rs2); - if (i == 0) - tmp64 = (tmp64 & ~((u_int64_t)nand << 32)) ^ - ((u_int64_t)xor << 32); - __fpu_setreg64(rd, tmp64); - } + __fpu_setreg64(rd, (__fpu_getreg64(rs2) & + ~((u_int64_t)nand << 32)) ^ ((u_int64_t)xor << 32)); + if (type == FTYPE_EXT) + __fpu_setreg64(rd + 2, __fpu_getreg64(rs2 + 2)); } } @@ -271,17 +263,17 @@ __fpu_cmpck(struct fpemu *fe) * multiply two integers this way. */ static int -__fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long tstate) +__fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, + u_long tstate) { struct fpn *fp; int opf, rs1, rs2, rd, type, mask, cx, cond; u_long reg, fsr; u_int space[4]; - int i; /* * `Decode' and execute instruction. Start with no exceptions. - * The type of any opf opcode is in the bottom two bits, so we + * The type of almost any OPF opcode is in the bottom two bits, so we * squish them out here. */ opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) | @@ -359,7 +351,7 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts __fpu_explode(fe, &fe->fe_f2, type, rs2); __fpu_compare(fe, 1, IF_F3_CC(insn)); return (__fpu_cmpck(fe)); - case FOP(INS2_FPop1, INSFP1_FMOV): /* these should all be pretty obvious */ + case FOP(INS2_FPop1, INSFP1_FMOV): __fpu_mov(fe, type, rd, rs2, 0, 0); return (0); case FOP(INS2_FPop1, INSFP1_FNEG): @@ -410,6 +402,7 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts case FOP(INS2_FPop1, INSFP1_FxTOd): case FOP(INS2_FPop1, INSFP1_FxTOq): type = FTYPE_LNG; + rs2 = RN_DECODE(type, IF_F3_RS2(insn)); __fpu_explode(fe, fp = &fe->fe_f1, type, rs2); /* sneaky; depends on instruction encoding */ type = (IF_F3_OPF(insn) >> 2) & 3; @@ -418,8 +411,7 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts case FOP(INS2_FPop1, INSFP1_FTOx): __fpu_explode(fe, fp = &fe->fe_f1, type, rs2); type = FTYPE_LNG; - mask = 1; /* needs 2 registers */ - rd = IF_F3_RD(insn) & ~mask; + rd = RN_DECODE(type, IF_F3_RD(insn)); break; case FOP(INS2_FPop1, INSFP1_FTOs): case FOP(INS2_FPop1, INSFP1_FTOd): @@ -457,10 +449,10 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts if (type == FTYPE_INT || type == FTYPE_SNG) __fpu_setreg(rd, space[0]); else { - for (i = 0; i < OPSZ(type); i += 2) { - __fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) | - space[i + 1]); - } + __fpu_setreg64(rd, ((u_int64_t)space[0] << 32) | space[1]); + if (type == FTYPE_EXT) + __fpu_setreg64(rd + 2, + ((u_int64_t)space[2] << 32) | space[3]); } return (0); /* success */ } diff --git a/lib/libc/sparc64/fpu/fpu_emu.h b/lib/libc/sparc64/fpu/fpu_emu.h index de156e2..0d1d16d 100644 --- a/lib/libc/sparc64/fpu/fpu_emu.h +++ b/lib/libc/sparc64/fpu/fpu_emu.h @@ -140,7 +140,7 @@ struct fpn { #define FTYPE_SNG INSFP_s #define FTYPE_DBL INSFP_d #define FTYPE_EXT INSFP_q -#define FTYPE_LNG -1 +#define FTYPE_LNG 4 /* * Emulator state. -- cgit v1.1 From 9d42867c19941d56932e8cea0e02a9538e58cd55 Mon Sep 17 00:00:00 2001 From: jhb Date: Mon, 22 Mar 2010 15:52:26 +0000 Subject: MFC 204950,205020: Use thr_once() with once_t controls to initialize various thread_key_t objects used to provide per-thread storage in the RPC code. Almost all of these used double-checking with a dedicated mutex (tsd_lock) to do this before. However, that is not always safe with more relaxed memory orders. There were also other bugs, such as one in __rpc_createrr() that caused a new key to be allocated each time __rpc_createrr() was invoked. --- lib/libc/rpc/Symbol.map | 4 ---- lib/libc/rpc/clnt_simple.c | 22 ++++++++++++++++------ lib/libc/rpc/getnetconfig.c | 29 +++++++++++++++-------------- lib/libc/rpc/key_call.c | 20 +++++++++++++------- lib/libc/rpc/mt_misc.c | 27 +++++++++++++-------------- lib/libc/rpc/mt_misc.h | 1 - lib/libc/rpc/rpc_generic.c | 28 +++++++++++++++------------- lib/libc/rpc/rpc_soc.c | 15 +++++++++------ 8 files changed, 81 insertions(+), 65 deletions(-) (limited to 'lib') diff --git a/lib/libc/rpc/Symbol.map b/lib/libc/rpc/Symbol.map index ccf0bfa..4f356de 100644 --- a/lib/libc/rpc/Symbol.map +++ b/lib/libc/rpc/Symbol.map @@ -239,10 +239,6 @@ FBSDprivate_1.0 { __key_encryptsession_pk_LOCAL; __key_decryptsession_pk_LOCAL; __key_gendes_LOCAL; - __tsd_lock; /* - * Why does usr.bin/rpcinfo/Makefile need rpc_generic.c? - * Remove this hack if rpcinfo stops building with it. - */ __svc_clean_idle; __rpc_gss_unwrap; __rpc_gss_unwrap_stub; diff --git a/lib/libc/rpc/clnt_simple.c b/lib/libc/rpc/clnt_simple.c index 12b6679..ba21d2d 100644 --- a/lib/libc/rpc/clnt_simple.c +++ b/lib/libc/rpc/clnt_simple.c @@ -76,7 +76,11 @@ struct rpc_call_private { char nettype[NETIDLEN]; /* Network type */ }; static struct rpc_call_private *rpc_call_private_main; +static thread_key_t rpc_call_key; +static once_t rpc_call_once = ONCE_INITIALIZER; +static int rpc_call_key_error; +static void rpc_call_key_init(void); static void rpc_call_destroy(void *); static void @@ -91,6 +95,13 @@ rpc_call_destroy(void *vp) } } +static void +rpc_call_key_init(void) +{ + + rpc_call_key_error = thr_keycreate(&rpc_call_key, rpc_call_destroy); +} + /* * This is the simplified interface to the client rpc layer. * The client handle is not destroyed here and is reused for @@ -112,17 +123,16 @@ rpc_call(host, prognum, versnum, procnum, inproc, in, outproc, out, nettype) struct rpc_call_private *rcp = (struct rpc_call_private *) 0; enum clnt_stat clnt_stat; struct timeval timeout, tottimeout; - static thread_key_t rpc_call_key; int main_thread = 1; if ((main_thread = thr_main())) { rcp = rpc_call_private_main; } else { - if (rpc_call_key == 0) { - mutex_lock(&tsd_lock); - if (rpc_call_key == 0) - thr_keycreate(&rpc_call_key, rpc_call_destroy); - mutex_unlock(&tsd_lock); + if (thr_once(&rpc_call_once, rpc_call_key_init) != 0 || + rpc_call_key_error != 0) { + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = rpc_call_key_error; + return (rpc_createerr.cf_stat); } rcp = (struct rpc_call_private *)thr_getspecific(rpc_call_key); } diff --git a/lib/libc/rpc/getnetconfig.c b/lib/libc/rpc/getnetconfig.c index e5db51a..1310e36 100644 --- a/lib/libc/rpc/getnetconfig.c +++ b/lib/libc/rpc/getnetconfig.c @@ -130,21 +130,29 @@ static struct netconfig *dup_ncp(struct netconfig *); static FILE *nc_file; /* for netconfig db */ -static pthread_mutex_t nc_file_lock = PTHREAD_MUTEX_INITIALIZER; +static mutex_t nc_file_lock = MUTEX_INITIALIZER; static struct netconfig_info ni = { 0, 0, NULL, NULL}; -static pthread_mutex_t ni_lock = PTHREAD_MUTEX_INITIALIZER; +static mutex_t ni_lock = MUTEX_INITIALIZER; +static thread_key_t nc_key; +static once_t nc_once = ONCE_INITIALIZER; +static int nc_key_error; + +static void +nc_key_init(void) +{ + + nc_key_error = thr_keycreate(&nc_key, free); +} #define MAXNETCONFIGLINE 1000 static int * __nc_error() { - static pthread_mutex_t nc_lock = PTHREAD_MUTEX_INITIALIZER; - static thread_key_t nc_key = 0; static int nc_error = 0; - int error, *nc_addr; + int *nc_addr; /* * Use the static `nc_error' if we are the main thread @@ -153,15 +161,8 @@ __nc_error() */ if (thr_main()) return (&nc_error); - if (nc_key == 0) { - error = 0; - mutex_lock(&nc_lock); - if (nc_key == 0) - error = thr_keycreate(&nc_key, free); - mutex_unlock(&nc_lock); - if (error) - return (&nc_error); - } + if (thr_once(&nc_once, nc_key_init) != 0 || nc_key_error != 0) + return (&nc_error); if ((nc_addr = (int *)thr_getspecific(nc_key)) == NULL) { nc_addr = (int *)malloc(sizeof (int)); if (thr_setspecific(nc_key, (void *) nc_addr) != 0) { diff --git a/lib/libc/rpc/key_call.c b/lib/libc/rpc/key_call.c index 6cc1139..afb11c8 100644 --- a/lib/libc/rpc/key_call.c +++ b/lib/libc/rpc/key_call.c @@ -279,6 +279,9 @@ struct key_call_private { uid_t uid; /* user-id at last authorization */ }; static struct key_call_private *key_call_private_main = NULL; +static thread_key_t key_call_key; +static once_t key_call_once = ONCE_INITIALIZER; +static int key_call_key_error; static void key_call_destroy(void *vp) @@ -292,6 +295,13 @@ key_call_destroy(void *vp) } } +static void +key_call_init(void) +{ + + key_call_key_error = thr_keycreate(&key_call_key, key_call_destroy); +} + /* * Keep the handle cached. This call may be made quite often. */ @@ -307,7 +317,6 @@ int vers; struct utsname u; int main_thread; int fd; - static thread_key_t key_call_key; #define TOTAL_TIMEOUT 30 /* total timeout talking to keyserver */ #define TOTAL_TRIES 5 /* Number of tries */ @@ -315,12 +324,9 @@ int vers; if ((main_thread = thr_main())) { kcp = key_call_private_main; } else { - if (key_call_key == 0) { - mutex_lock(&tsd_lock); - if (key_call_key == 0) - thr_keycreate(&key_call_key, key_call_destroy); - mutex_unlock(&tsd_lock); - } + if (thr_once(&key_call_once, key_call_init) != 0 || + key_call_key_error != 0) + return ((CLIENT *) NULL); kcp = (struct key_call_private *)thr_getspecific(key_call_key); } if (kcp == (struct key_call_private *)NULL) { diff --git a/lib/libc/rpc/mt_misc.c b/lib/libc/rpc/mt_misc.c index 6241611..7abcaed 100644 --- a/lib/libc/rpc/mt_misc.c +++ b/lib/libc/rpc/mt_misc.c @@ -28,7 +28,6 @@ __FBSDID("$FreeBSD$"); #define proglst_lock __proglst_lock #define rpcsoc_lock __rpcsoc_lock #define svcraw_lock __svcraw_lock -#define tsd_lock __tsd_lock #define xprtlist_lock __xprtlist_lock /* protects the services list (svc.c) */ @@ -76,33 +75,33 @@ pthread_mutex_t rpcsoc_lock = PTHREAD_MUTEX_INITIALIZER; /* svc_raw.c serialization */ pthread_mutex_t svcraw_lock = PTHREAD_MUTEX_INITIALIZER; -/* protects TSD key creation */ -pthread_mutex_t tsd_lock = PTHREAD_MUTEX_INITIALIZER; - /* xprtlist (svc_generic.c) */ pthread_mutex_t xprtlist_lock = PTHREAD_MUTEX_INITIALIZER; #undef rpc_createerr struct rpc_createerr rpc_createerr; +static thread_key_t rce_key; +static once_t rce_once = ONCE_INITIALIZER; +static int rce_key_error; + +static void +rce_key_init(void) +{ + + rce_key_error = thr_keycreate(&rce_key, free); +} struct rpc_createerr * __rpc_createerr() { - static thread_key_t rce_key = 0; struct rpc_createerr *rce_addr = 0; if (thr_main()) return (&rpc_createerr); - if ((rce_addr = - (struct rpc_createerr *)thr_getspecific(rce_key)) != 0) { - mutex_lock(&tsd_lock); - if (thr_keycreate(&rce_key, free) != 0) { - mutex_unlock(&tsd_lock); - return (&rpc_createerr); - } - mutex_unlock(&tsd_lock); - } + if (thr_once(&rce_once, rce_key_init) != 0 || rce_key_error != 0) + return (&rpc_createerr); + rce_addr = (struct rpc_createerr *)thr_getspecific(rce_key); if (!rce_addr) { rce_addr = (struct rpc_createerr *) malloc(sizeof (struct rpc_createerr)); diff --git a/lib/libc/rpc/mt_misc.h b/lib/libc/rpc/mt_misc.h index e785c30..9cc2349 100644 --- a/lib/libc/rpc/mt_misc.h +++ b/lib/libc/rpc/mt_misc.h @@ -42,7 +42,6 @@ #define proglst_lock __proglst_lock #define rpcsoc_lock __rpcsoc_lock #define svcraw_lock __svcraw_lock -#define tsd_lock __tsd_lock #define xprtlist_lock __xprtlist_lock extern pthread_rwlock_t svc_lock; diff --git a/lib/libc/rpc/rpc_generic.c b/lib/libc/rpc/rpc_generic.c index 81bd92b..ab259d5 100644 --- a/lib/libc/rpc/rpc_generic.c +++ b/lib/libc/rpc/rpc_generic.c @@ -221,6 +221,18 @@ getnettype(nettype) return (_rpctypelist[i].type); } +static thread_key_t tcp_key, udp_key; +static once_t keys_once = ONCE_INITIALIZER; +static int tcp_key_error, udp_key_error; + +static void +keys_init(void) +{ + + tcp_key_error = thr_keycreate(&tcp_key, free); + udp_key_error = thr_keycreate(&udp_key, free); +} + /* * For the given nettype (tcp or udp only), return the first structure found. * This should be freed by calling freenetconfigent() @@ -236,25 +248,15 @@ __rpc_getconfip(nettype) static char *netid_udp_main; struct netconfig *dummy; int main_thread; - static thread_key_t tcp_key, udp_key; if ((main_thread = thr_main())) { netid_udp = netid_udp_main; netid_tcp = netid_tcp_main; } else { - if (tcp_key == 0) { - mutex_lock(&tsd_lock); - if (tcp_key == 0) - thr_keycreate(&tcp_key, free); - mutex_unlock(&tsd_lock); - } + if (thr_once(&keys_once, keys_init) != 0 || + tcp_key_error != 0 || udp_key_error != 0) + return (NULL); netid_tcp = (char *)thr_getspecific(tcp_key); - if (udp_key == 0) { - mutex_lock(&tsd_lock); - if (udp_key == 0) - thr_keycreate(&udp_key, free); - mutex_unlock(&tsd_lock); - } netid_udp = (char *)thr_getspecific(udp_key); } if (!netid_udp && !netid_tcp) { diff --git a/lib/libc/rpc/rpc_soc.c b/lib/libc/rpc/rpc_soc.c index 5922063..2568d43 100644 --- a/lib/libc/rpc/rpc_soc.c +++ b/lib/libc/rpc/rpc_soc.c @@ -360,6 +360,14 @@ registerrpc(prognum, versnum, procnum, progname, inproc, outproc) */ static thread_key_t clnt_broadcast_key; static resultproc_t clnt_broadcast_result_main; +static once_t clnt_broadcast_once = ONCE_INITIALIZER; + +static void +clnt_broadcast_key_init(void) +{ + + thr_keycreate(&clnt_broadcast_key, free); +} /* * Need to translate the netbuf address into sockaddr_in address. @@ -402,12 +410,7 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult) if (thr_main()) clnt_broadcast_result_main = eachresult; else { - if (clnt_broadcast_key == 0) { - mutex_lock(&tsd_lock); - if (clnt_broadcast_key == 0) - thr_keycreate(&clnt_broadcast_key, free); - mutex_unlock(&tsd_lock); - } + thr_once(&clnt_broadcast_once, clnt_broadcast_key_init); thr_setspecific(clnt_broadcast_key, (void *) eachresult); } return rpc_broadcast((rpcprog_t)prog, (rpcvers_t)vers, -- cgit v1.1 From dde080c859dfbc0b12fd706108b5debe53529c9f Mon Sep 17 00:00:00 2001 From: jilles Date: Tue, 23 Mar 2010 23:25:17 +0000 Subject: MFC r205398: Do not create *.gmon files for PIE executables on i386. Scrt1_c.o was accidentally compiled with -DGCRT (profiling), like gcrt1_c.o. This problem is i386-specific, the other architectures are OK. If you have problems with PIE executables such as samba and cups leaving behind gmon files, rebuild them after installing this change. PR: ports/143924 --- lib/csu/i386-elf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/csu/i386-elf/Makefile b/lib/csu/i386-elf/Makefile index c2af118..45c162f 100644 --- a/lib/csu/i386-elf/Makefile +++ b/lib/csu/i386-elf/Makefile @@ -24,7 +24,7 @@ crt1.o: crt1_c.o crt1_s.o objcopy --localize-symbol _start1 crt1.o Scrt1_c.o: crt1_c.c - ${CC} ${CFLAGS} -DGCRT -fPIC -DPIC -c -o Scrt1_c.o ${.CURDIR}/crt1_c.c + ${CC} ${CFLAGS} -fPIC -DPIC -c -o Scrt1_c.o ${.CURDIR}/crt1_c.c Scrt1.o: Scrt1_c.o crt1_s.o ${LD} ${LDFLAGS} -o Scrt1.o -r crt1_s.o Scrt1_c.o -- cgit v1.1 From db13dfe8cdd4ec67909d7149a2214030034b5349 Mon Sep 17 00:00:00 2001 From: dougb Date: Mon, 29 Mar 2010 06:31:58 +0000 Subject: Update to 9.6.2-P1, the latest patchfix release which deals with the problems related to the handling of broken DNSSEC trust chains. This fix is only relevant for those who have DNSSEC validation enabled and configure trust anchors from third parties, either manually, or through a system like DLV. --- lib/bind/config.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/bind/config.h b/lib/bind/config.h index 1d68450..3038b5b 100644 --- a/lib/bind/config.h +++ b/lib/bind/config.h @@ -166,6 +166,12 @@ int sigwait(const unsigned int *set, int *sig); /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 +/* Define to 1 if you have the `EVP_sha256' function. */ +#define HAVE_EVP_SHA256 1 + +/* Define to 1 if you have the `EVP_sha512' function. */ +#define HAVE_EVP_SHA512 1 + /* Define to 1 if you have the header file. */ #define HAVE_FCNTL_H 1 -- cgit v1.1 From eff331f048ccbce2e947b38fb71422c6faf0bc06 Mon Sep 17 00:00:00 2001 From: marius Date: Tue, 30 Mar 2010 18:58:07 +0000 Subject: MFC: r205393 - Remove a bogus forward declaration. - Fix whitespace. --- lib/libc/sparc64/fpu/fpu_extern.h | 7 +++---- lib/libc/sparc64/fpu/fpu_implode.c | 3 --- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/libc/sparc64/fpu/fpu_extern.h b/lib/libc/sparc64/fpu/fpu_extern.h index f22c463..0de30fc 100644 --- a/lib/libc/sparc64/fpu/fpu_extern.h +++ b/lib/libc/sparc64/fpu/fpu_extern.h @@ -41,7 +41,6 @@ #define _SPARC64_FPU_FPU_EXTERN_H_ struct utrapframe; -union instr; struct fpemu; struct fpn; @@ -61,9 +60,9 @@ struct fpn *__fpu_div(struct fpemu *); int __fpu_itof(struct fpn *, u_int); int __fpu_xtof(struct fpn *, u_int64_t); int __fpu_stof(struct fpn *, u_int); -int __fpu_dtof(struct fpn *, u_int, u_int ); -int __fpu_qtof(struct fpn *, u_int, u_int , u_int , u_int ); -void __fpu_explode(struct fpemu *, struct fpn *, int, int ); +int __fpu_dtof(struct fpn *, u_int, u_int); +int __fpu_qtof(struct fpn *, u_int, u_int, u_int, u_int); +void __fpu_explode(struct fpemu *, struct fpn *, int, int); /* fpu_implode.c */ u_int __fpu_ftoi(struct fpemu *, struct fpn *); diff --git a/lib/libc/sparc64/fpu/fpu_implode.c b/lib/libc/sparc64/fpu/fpu_implode.c index 5287d2b..32a0e7c 100644 --- a/lib/libc/sparc64/fpu/fpu_implode.c +++ b/lib/libc/sparc64/fpu/fpu_implode.c @@ -198,7 +198,6 @@ __fpu_ftoi(fe, fp) sign = fp->fp_sign; switch (fp->fp_class) { - case FPC_ZERO: return (0); @@ -248,7 +247,6 @@ __fpu_ftox(fe, fp, res) sign = fp->fp_sign; switch (fp->fp_class) { - case FPC_ZERO: res[1] = 0; return (0); @@ -504,7 +502,6 @@ __fpu_implode(fe, fp, type, space) { switch (type) { - case FTYPE_LNG: space[0] = __fpu_ftox(fe, fp, space); break; -- cgit v1.1 From ca721ce8830393b7024be0906c4c518d5090e2dc Mon Sep 17 00:00:00 2001 From: marius Date: Tue, 30 Mar 2010 19:03:26 +0000 Subject: MFC: r205394 Ensure that __fpu_ftox() both returns the high bits and res[1] contains the low bits also in the default case. PR: 144900 Obtained from: OpenBSD --- lib/libc/sparc64/fpu/fpu_implode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/libc/sparc64/fpu/fpu_implode.c b/lib/libc/sparc64/fpu/fpu_implode.c index 32a0e7c..91e40c1 100644 --- a/lib/libc/sparc64/fpu/fpu_implode.c +++ b/lib/libc/sparc64/fpu/fpu_implode.c @@ -248,8 +248,8 @@ __fpu_ftox(fe, fp, res) sign = fp->fp_sign; switch (fp->fp_class) { case FPC_ZERO: - res[1] = 0; - return (0); + i = 0; + goto done; case FPC_NUM: /* @@ -273,15 +273,17 @@ __fpu_ftox(fe, fp, res) break; if (sign) i = -i; - res[1] = (int)i; - return (i >> 32); + goto done; default: /* Inf, qNaN, sNaN */ break; } /* overflow: replace any inexact exception with invalid */ fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV; - return (0x7fffffffffffffffLL + sign); + i = 0x7fffffffffffffffLL + sign; +done: + res[1] = i & 0xffffffff; + return (i >> 32); } /* -- cgit v1.1 From 09058e4e259d9d79ef54eff5d335d02ce2c18668 Mon Sep 17 00:00:00 2001 From: marius Date: Tue, 30 Mar 2010 19:05:08 +0000 Subject: MFC: r205395 FPU_DEBUG requires . PR: 144900 Submitted by: Peter Jeremy --- lib/libc/sparc64/fpu/fpu.c | 5 ++++- lib/libc/sparc64/fpu/fpu_explode.c | 4 ++++ lib/libc/sparc64/fpu/fpu_implode.c | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/sparc64/fpu/fpu.c b/lib/libc/sparc64/fpu/fpu.c index 7c8a3a9..4e92788 100644 --- a/lib/libc/sparc64/fpu/fpu.c +++ b/lib/libc/sparc64/fpu/fpu.c @@ -69,9 +69,12 @@ __FBSDID("$FreeBSD$"); #include "namespace.h" #include -#include #include +#ifdef FPU_DEBUG +#include +#endif #include +#include #include "un-namespace.h" #include "libc_private.h" diff --git a/lib/libc/sparc64/fpu/fpu_explode.c b/lib/libc/sparc64/fpu/fpu_explode.c index 09cfd5a..12c8802 100644 --- a/lib/libc/sparc64/fpu/fpu_explode.c +++ b/lib/libc/sparc64/fpu/fpu_explode.c @@ -49,6 +49,10 @@ __FBSDID("$FreeBSD$"); #include +#ifdef FPU_DEBUG +#include +#endif + #include #include #include diff --git a/lib/libc/sparc64/fpu/fpu_implode.c b/lib/libc/sparc64/fpu/fpu_implode.c index 91e40c1..82a8460 100644 --- a/lib/libc/sparc64/fpu/fpu_implode.c +++ b/lib/libc/sparc64/fpu/fpu_implode.c @@ -49,6 +49,10 @@ __FBSDID("$FreeBSD$"); #include +#ifdef FPU_DEBUG +#include +#endif + #include #include #include -- cgit v1.1 From 5c0d723c8f041700174535e11644e42e7b2cd5eb Mon Sep 17 00:00:00 2001 From: marius Date: Tue, 30 Mar 2010 19:06:31 +0000 Subject: MFC: r205396 Division should take both arguments' signs into account when the the dividend is infinity or zero and the divisor is not the same. PR: 144900 Submitted by: Peter Jeremy --- lib/libc/sparc64/fpu/fpu_div.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/libc/sparc64/fpu/fpu_div.c b/lib/libc/sparc64/fpu/fpu_div.c index c4b8ef6..4748daf 100644 --- a/lib/libc/sparc64/fpu/fpu_div.c +++ b/lib/libc/sparc64/fpu/fpu_div.c @@ -167,14 +167,16 @@ __fpu_div(fe) * return it. Otherwise we have the following cases: * * Inf / Inf = NaN, plus NV exception - * Inf / num = Inf [i.e., return x] - * Inf / 0 = Inf [i.e., return x] - * 0 / Inf = 0 [i.e., return x] - * 0 / num = 0 [i.e., return x] + * Inf / num = Inf [i.e., return x #] + * Inf / 0 = Inf [i.e., return x #] + * 0 / Inf = 0 [i.e., return x #] + * 0 / num = 0 [i.e., return x #] * 0 / 0 = NaN, plus NV exception - * num / Inf = 0 + * num / Inf = 0 # * num / num = num (do the divide) - * num / 0 = Inf, plus DZ exception + * num / 0 = Inf #, plus DZ exception + * + * # Sign of result is XOR of operand signs. */ if (ISNAN(x) || ISNAN(y)) { ORDER(x, y); @@ -183,10 +185,10 @@ __fpu_div(fe) if (ISINF(x) || ISZERO(x)) { if (x->fp_class == y->fp_class) return (__fpu_newnan(fe)); + x->fp_sign ^= y->fp_sign; return (x); } - /* all results at this point use XOR of operand signs */ x->fp_sign ^= y->fp_sign; if (ISINF(y)) { x->fp_class = FPC_ZERO; -- cgit v1.1 From a9048188435e001c00b98ed0536b49501140bd77 Mon Sep 17 00:00:00 2001 From: marius Date: Tue, 30 Mar 2010 19:08:02 +0000 Subject: MFC: r205397 - While SPARC V9 allows tininess to be detected either before or after rounding (impl. dep. #55), the SPARC JPS1 responsible for SPARC64 and UltraSPARC processors defines that in all cases tinyness is detected before rounding, therefore rounding up to the smallest normalised number should set the underflow flag. - If an infinite result is rounded down, the result should have an exponent 1 less than the value for infinity. PR: 144900 Submitted by: Peter Jeremy --- lib/libc/sparc64/fpu/fpu_implode.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/libc/sparc64/fpu/fpu_implode.c b/lib/libc/sparc64/fpu/fpu_implode.c index 82a8460..2a2a9d0 100644 --- a/lib/libc/sparc64/fpu/fpu_implode.c +++ b/lib/libc/sparc64/fpu/fpu_implode.c @@ -329,8 +329,9 @@ __fpu_ftos(fe, fp) * right to introduce leading zeroes. Rounding then acts * differently for normals and subnormals: the largest subnormal * may round to the smallest normal (1.0 x 2^minexp), or may - * remain subnormal. In the latter case, signal an underflow - * if the result was inexact or if underflow traps are enabled. + * remain subnormal. A number that is subnormal before rounding + * will signal an underflow if the result is inexact or if underflow + * traps are enabled. * * Rounding a normal, on the other hand, always produces another * normal (although either way the result might be too big for @@ -345,8 +346,10 @@ __fpu_ftos(fe, fp) if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */ /* -NG for g,r; -SNG_FRACBITS-exp for fraction */ (void) __fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp); - if (fpround(fe, fp) && fp->fp_mant[3] == SNG_EXP(1)) + if (fpround(fe, fp) && fp->fp_mant[3] == SNG_EXP(1)) { + fe->fe_cx |= FSR_UF; return (sign | SNG_EXP(1) | 0); + } if ((fe->fe_cx & FSR_NX) || (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT))) fe->fe_cx |= FSR_UF; @@ -407,6 +410,7 @@ zero: res[1] = 0; if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) { (void) __fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp); if (fpround(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) { + fe->fe_cx |= FSR_UF; res[1] = 0; return (sign | DBL_EXP(1) | 0); } @@ -426,7 +430,7 @@ zero: res[1] = 0; return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0); } res[1] = ~0; - return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK); + return (sign | DBL_EXP(DBL_EXP_INFNAN - 1) | DBL_MASK); } done: res[1] = fp->fp_mant[3]; @@ -468,6 +472,7 @@ zero: res[1] = res[2] = res[3] = 0; if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) { (void) __fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp); if (fpround(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) { + fe->fe_cx |= FSR_UF; res[1] = res[2] = res[3] = 0; return (sign | EXT_EXP(1) | 0); } @@ -487,7 +492,7 @@ zero: res[1] = res[2] = res[3] = 0; return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0); } res[1] = res[2] = res[3] = ~0; - return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK); + return (sign | EXT_EXP(EXT_EXP_INFNAN - 1) | EXT_MASK); } done: res[1] = fp->fp_mant[1]; -- cgit v1.1 From c54f8e795a1ebd5c6fb6729a0d1e70819cc139ba Mon Sep 17 00:00:00 2001 From: marius Date: Tue, 30 Mar 2010 19:13:37 +0000 Subject: MFC: r205410 Avoid aliasing which leads to incorrect results when compiling with the default strict aliasing rules. PR: 144900 Submitted by: Peter Jeremy --- lib/libc/sparc64/fpu/fpu_explode.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/libc/sparc64/fpu/fpu_explode.c b/lib/libc/sparc64/fpu/fpu_explode.c index 12c8802..474e0df 100644 --- a/lib/libc/sparc64/fpu/fpu_explode.c +++ b/lib/libc/sparc64/fpu/fpu_explode.c @@ -139,9 +139,9 @@ __fpu_xtof(fp, i) * a signed or unsigned entity. */ if (fp->fp_sign && (int64_t)i < 0) - *((int64_t*)fp->fp_mant) = -i; + *((int64_t *)fp->fp_mant) = -i; else - *((int64_t*)fp->fp_mant) = i; + *((int64_t *)fp->fp_mant) = i; fp->fp_mant[2] = 0; fp->fp_mant[3] = 0; __fpu_norm(fp); @@ -262,14 +262,12 @@ __fpu_explode(fe, fp, type, reg) struct fpn *fp; int type, reg; { - u_int32_t s, *sp; - u_int64_t l[2]; - void *vl = l; + u_int64_t l0, l1; + u_int32_t s; if (type == FTYPE_LNG || type == FTYPE_DBL || type == FTYPE_EXT) { - l[0] = __fpu_getreg64(reg & ~1); - sp = vl; - fp->fp_sign = sp[0] >> 31; + l0 = __fpu_getreg64(reg & ~1); + fp->fp_sign = l0 >> 63; } else { s = __fpu_getreg(reg); fp->fp_sign = s >> 31; @@ -277,7 +275,7 @@ __fpu_explode(fe, fp, type, reg) fp->fp_sticky = 0; switch (type) { case FTYPE_LNG: - s = __fpu_xtof(fp, l[0]); + s = __fpu_xtof(fp, l0); break; case FTYPE_INT: @@ -289,12 +287,13 @@ __fpu_explode(fe, fp, type, reg) break; case FTYPE_DBL: - s = __fpu_dtof(fp, sp[0], sp[1]); + s = __fpu_dtof(fp, l0 >> 32, l0 & 0xffffffff); break; case FTYPE_EXT: - l[1] = __fpu_getreg64((reg & ~1) + 2); - s = __fpu_qtof(fp, sp[0], sp[1], sp[2], sp[3]); + l1 = __fpu_getreg64((reg & ~1) + 2); + s = __fpu_qtof(fp, l0 >> 32, l0 & 0xffffffff, l1 >> 32, + l1 & 0xffffffff); break; default: -- cgit v1.1 From 5c3b044c3171a07b537efc08d6c9c5bab7c12e62 Mon Sep 17 00:00:00 2001 From: gahr Date: Wed, 31 Mar 2010 13:51:31 +0000 Subject: MFC r205606 Remove const'ness from dlerror(3) prototype, for consistency with POSIX. Approved by: cognet --- lib/libc/gen/dlfcn.c | 4 ++-- lib/libc/gen/dlopen.3 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/dlfcn.c b/lib/libc/gen/dlfcn.c index 4be8847..a1ca29d 100644 --- a/lib/libc/gen/dlfcn.c +++ b/lib/libc/gen/dlfcn.c @@ -34,7 +34,7 @@ __FBSDID("$FreeBSD$"); #include #include -static const char sorry[] = "Service unavailable"; +static char sorry[] = "Service unavailable"; /* * For ELF, the dynamic linker directly resolves references to its @@ -69,7 +69,7 @@ dlclose(void *handle) } #pragma weak dlerror -const char * +char * dlerror(void) { return sorry; diff --git a/lib/libc/gen/dlopen.3 b/lib/libc/gen/dlopen.3 index 6488bef..d4b111a 100644 --- a/lib/libc/gen/dlopen.3 +++ b/lib/libc/gen/dlopen.3 @@ -52,7 +52,7 @@ .Fn dlsym "void * restrict handle" "const char * restrict symbol" .Ft dlfunc_t .Fn dlfunc "void * restrict handle" "const char * restrict symbol" -.Ft const char * +.Ft char * .Fn dlerror "void" .Ft int .Fn dlclose "void *handle" -- cgit v1.1 From e57fc48ad00a5d6274166cf7517775ba1d2301bd Mon Sep 17 00:00:00 2001 From: thompsa Date: Wed, 7 Apr 2010 00:34:05 +0000 Subject: MFC r203147 Add a function to check if the usb devices is still connected. Submitted by: Hans Petter Selasky --- lib/libusb/libusb20.3 | 11 +++++++++++ lib/libusb/libusb20.c | 10 ++++++++++ lib/libusb/libusb20.h | 1 + lib/libusb/libusb20_int.h | 2 ++ lib/libusb/libusb20_ugen20.c | 20 ++++++++++++++++++++ 5 files changed, 44 insertions(+) (limited to 'lib') diff --git a/lib/libusb/libusb20.3 b/lib/libusb/libusb20.3 index 5404013..7896e8f0 100644 --- a/lib/libusb/libusb20.3 +++ b/lib/libusb/libusb20.3 @@ -143,6 +143,8 @@ USB access library (libusb -lusb) .Ft int .Fn libusb20_dev_reset "struct libusb20_device *pdev" .Ft int +.Fn libusb20_dev_check_connected "struct libusb20_device *pdev" +.Ft int .Fn libusb20_dev_set_power_mode "struct libusb20_device *pdev" "uint8_t power_mode" .Ft uint8_t .Fn libusb20_dev_get_power_mode "struct libusb20_device *pdev" @@ -677,6 +679,15 @@ the last set USB configuration. This function returns zero on success else a LIBUSB20_ERROR value is returned. . +. +.Pp +. +.Fn libusb20_dev_check_connected +will check if an opened USB device is still connected. +. +This function returns zero if the device is still connected else a LIBUSB20_ERROR value is returned. +. +. .Pp . .Fn libusb20_dev_set_power_mode diff --git a/lib/libusb/libusb20.c b/lib/libusb/libusb20.c index 1c75fc1..e181b49 100644 --- a/lib/libusb/libusb20.c +++ b/lib/libusb/libusb20.c @@ -67,6 +67,7 @@ dummy_callback(struct libusb20_transfer *xfer) #define dummy_set_config_index (void *)dummy_int #define dummy_set_alt_index (void *)dummy_int #define dummy_reset_device (void *)dummy_int +#define dummy_check_connected (void *)dummy_int #define dummy_set_power_mode (void *)dummy_int #define dummy_get_power_mode (void *)dummy_int #define dummy_kernel_driver_active (void *)dummy_int @@ -673,6 +674,15 @@ libusb20_dev_reset(struct libusb20_device *pdev) } int +libusb20_dev_check_connected(struct libusb20_device *pdev) +{ + int error; + + error = pdev->methods->check_connected(pdev); + return (error); +} + +int libusb20_dev_set_power_mode(struct libusb20_device *pdev, uint8_t power_mode) { int error; diff --git a/lib/libusb/libusb20.h b/lib/libusb/libusb20.h index bb7d8a4..6b253aa 100644 --- a/lib/libusb/libusb20.h +++ b/lib/libusb/libusb20.h @@ -250,6 +250,7 @@ int libusb20_dev_request_sync(struct libusb20_device *pdev, struct LIBUSB20_CONT int libusb20_dev_req_string_sync(struct libusb20_device *pdev, uint8_t index, uint16_t langid, void *ptr, uint16_t len); int libusb20_dev_req_string_simple_sync(struct libusb20_device *pdev, uint8_t index, void *ptr, uint16_t len); int libusb20_dev_reset(struct libusb20_device *pdev); +int libusb20_dev_check_connected(struct libusb20_device *pdev); int libusb20_dev_set_power_mode(struct libusb20_device *pdev, uint8_t power_mode); uint8_t libusb20_dev_get_power_mode(struct libusb20_device *pdev); int libusb20_dev_set_alt_index(struct libusb20_device *pdev, uint8_t iface_index, uint8_t alt_index); diff --git a/lib/libusb/libusb20_int.h b/lib/libusb/libusb20_int.h index 004b0cf..b0e0e2d 100644 --- a/lib/libusb/libusb20_int.h +++ b/lib/libusb/libusb20_int.h @@ -101,6 +101,7 @@ typedef int (libusb20_set_power_mode_t)(struct libusb20_device *pdev, uint8_t po typedef int (libusb20_get_power_mode_t)(struct libusb20_device *pdev, uint8_t *power_mode); typedef int (libusb20_set_alt_index_t)(struct libusb20_device *pdev, uint8_t iface_index, uint8_t alt_index); typedef int (libusb20_set_config_index_t)(struct libusb20_device *pdev, uint8_t index); +typedef int (libusb20_check_connected_t)(struct libusb20_device *pdev); /* USB transfer specific */ typedef int (libusb20_tr_open_t)(struct libusb20_transfer *xfer, uint32_t MaxBufSize, uint32_t MaxFrameCount, uint8_t ep_no); @@ -117,6 +118,7 @@ typedef void (libusb20_tr_cancel_async_t)(struct libusb20_transfer *xfer); m(n, kernel_driver_active) \ m(n, process) \ m(n, reset_device) \ + m(n, check_connected) \ m(n, set_power_mode) \ m(n, get_power_mode) \ m(n, set_alt_index) \ diff --git a/lib/libusb/libusb20_ugen20.c b/lib/libusb/libusb20_ugen20.c index efcca63..892ba0e 100644 --- a/lib/libusb/libusb20_ugen20.c +++ b/lib/libusb/libusb20_ugen20.c @@ -67,6 +67,7 @@ static libusb20_get_config_index_t ugen20_get_config_index; static libusb20_set_config_index_t ugen20_set_config_index; static libusb20_set_alt_index_t ugen20_set_alt_index; static libusb20_reset_device_t ugen20_reset_device; +static libusb20_check_connected_t ugen20_check_connected; static libusb20_set_power_mode_t ugen20_set_power_mode; static libusb20_get_power_mode_t ugen20_get_power_mode; static libusb20_kernel_driver_active_t ugen20_kernel_driver_active; @@ -553,6 +554,25 @@ ugen20_reset_device(struct libusb20_device *pdev) } static int +ugen20_check_connected(struct libusb20_device *pdev) +{ + uint32_t plugtime; + int error = 0; + + if (ioctl(pdev->file_ctrl, USB_GET_PLUGTIME, &plugtime)) { + error = LIBUSB20_ERROR_NO_DEVICE; + goto done; + } + + if (pdev->session_data.plugtime != plugtime) { + error = LIBUSB20_ERROR_NO_DEVICE; + goto done; + } +done: + return (error); +} + +static int ugen20_set_power_mode(struct libusb20_device *pdev, uint8_t power_mode) { int temp; -- cgit v1.1 From 522064f59658ae18f0477f0981fbc7ca1f8e06a3 Mon Sep 17 00:00:00 2001 From: avg Date: Thu, 8 Apr 2010 07:43:15 +0000 Subject: MFC r206177: hash.3: fix a factual mistake in the man page --- lib/libc/db/man/hash.3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/db/man/hash.3 b/lib/libc/db/man/hash.3 index 6724042..133885d 100644 --- a/lib/libc/db/man/hash.3 +++ b/lib/libc/db/man/hash.3 @@ -78,7 +78,7 @@ The element defines the .Nm -table bucket size, and is, by default, 256 bytes. +table bucket size, and is, by default, 4096 bytes. It may be preferable to increase the page size for disk-resident tables and tables with large data items. .It Va ffactor -- cgit v1.1 From 467d5479951a967aa4ed31d07540c64b7f2500d8 Mon Sep 17 00:00:00 2001 From: avg Date: Mon, 12 Apr 2010 21:12:03 +0000 Subject: MFC r206178: ibc/db/hash: cap auto-tuned block size --- lib/libc/db/hash/hash.c | 2 ++ lib/libc/db/hash/hash.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/db/hash/hash.c b/lib/libc/db/hash/hash.c index 83d2657..638814c 100644 --- a/lib/libc/db/hash/hash.c +++ b/lib/libc/db/hash/hash.c @@ -293,6 +293,8 @@ init_hash(HTAB *hashp, const char *file, const HASHINFO *info) if (stat(file, &statbuf)) return (NULL); hashp->BSIZE = statbuf.st_blksize; + if (hashp->BSIZE > MAX_BSIZE) + hashp->BSIZE = MAX_BSIZE; hashp->BSHIFT = __log2(hashp->BSIZE); } diff --git a/lib/libc/db/hash/hash.h b/lib/libc/db/hash/hash.h index 8329413..cd11a3a 100644 --- a/lib/libc/db/hash/hash.h +++ b/lib/libc/db/hash/hash.h @@ -118,7 +118,7 @@ typedef struct htab { /* Memory resident data structure */ /* * Constants */ -#define MAX_BSIZE 65536 /* 2^16 */ +#define MAX_BSIZE 32768 /* 2^15 but should be 65536 */ #define MIN_BUFFERS 6 #define MINHDRSIZE 512 #define DEF_BUFSIZE 65536 /* 64 K */ -- cgit v1.1 From c79cf0b1c9291cf43f8fbdbe73cd46a4c0579063 Mon Sep 17 00:00:00 2001 From: trasz Date: Tue, 13 Apr 2010 06:01:24 +0000 Subject: MFC r205796: Make acl_to_text_np(3) not crash on long group or user names in NFSv4 ACLs. PR: amd64/145091 --- lib/libc/posix1e/acl_to_text_nfs4.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_to_text_nfs4.c b/lib/libc/posix1e/acl_to_text_nfs4.c index 3adbfb4..5d0aa31 100644 --- a/lib/libc/posix1e/acl_to_text_nfs4.c +++ b/lib/libc/posix1e/acl_to_text_nfs4.c @@ -167,7 +167,7 @@ format_additional_id(char *str, size_t size, const acl_entry_t entry) static int format_entry(char *str, size_t size, const acl_entry_t entry, int flags) { - size_t off = 0, padding_length, maximum_who_field_length = 18; + size_t off = 0, min_who_field_length = 18; acl_permset_t permset; acl_flagset_t flagset; int error, len; @@ -188,12 +188,9 @@ format_entry(char *str, size_t size, const acl_entry_t entry, int flags) if (error) return (error); len = strlen(buf); - padding_length = maximum_who_field_length - len; - if (padding_length > 0) { - memset(str, ' ', padding_length); - off += padding_length; - } - off += snprintf(str + off, size - off, "%s:", buf); + if (len < min_who_field_length) + len = min_who_field_length; + off += snprintf(str + off, size - off, "%*s:", len, buf); error = _nfs4_format_access_mask(buf, sizeof(buf), *permset, flags & ACL_TEXT_VERBOSE); -- cgit v1.1 From 89a00e020b2c8a517462b74b3c52eaf1d11f7d6f Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 14 Apr 2010 15:22:58 +0000 Subject: MFC 205536: Reject attempts to create a MAP_ANON mapping with a non-zero offset. --- lib/libc/sys/mmap.2 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/sys/mmap.2 b/lib/libc/sys/mmap.2 index e632748..85019b2 100644 --- a/lib/libc/sys/mmap.2 +++ b/lib/libc/sys/mmap.2 @@ -105,7 +105,7 @@ The file descriptor used for creating must be \-1. The .Fa offset -argument is ignored. +argument must be 0. .\".It Dv MAP_FILE .\"Mapped from a regular file or character-special device memory. .It Dv MAP_FIXED @@ -312,6 +312,11 @@ was equal to zero. was specified and the .Fa fd argument was not -1. +.It Bq Er EINVAL +.Dv MAP_ANON +was specified and the +.Fa offset +argument was not 0. .It Bq Er ENODEV .Dv MAP_ANON has not been specified and -- cgit v1.1 From 9323e2ba3f24089d879edac55fbd2d36dde42947 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 14 Apr 2010 17:17:06 +0000 Subject: MFC 205900: Use panic() (which the environment is required to provide to libstand) to implement assert() instead of relying on a non-required exit(). The exit() invocation also did not match the semantics of the exit() routine that current boot environments happen to require. --- lib/libstand/assert.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libstand/assert.c b/lib/libstand/assert.c index b424358..8eec63a 100644 --- a/lib/libstand/assert.c +++ b/lib/libstand/assert.c @@ -35,10 +35,10 @@ void __assert(const char *func, const char *file, int line, const char *expression) { if (func == NULL) - printf("Assertion failed: (%s), file %s, line %d.\n", + panic("Assertion failed: (%s), file %s, line %d.\n", expression, file, line); else - printf("Assertion failed: (%s), function %s, file %s, line " - "%d.\n", expression, func, file, line); - exit(); + panic( + "Assertion failed: (%s), function %s, file %s, line %d.\n", + expression, func, file, line); } -- cgit v1.1 From 3a63a51857c2a0f86bb2d3b2638ec357a8500db8 Mon Sep 17 00:00:00 2001 From: fabient Date: Fri, 16 Apr 2010 15:45:09 +0000 Subject: MFC r206089, r206684: - Support for uncore counting events: one fixed PMC with the uncore domain clock, 8 programmable PMC. - Westmere based CPU (Xeon 5600, Corei7 980X) support. - New man pages with events list for core and uncore. - Updated Corei7 events with Intel 253669-033US December 2009 doc. There is some removed events in the documentation, they have been kept in the code but documented in the man page as obsolete. - Offcore response events can be setup with rsp token. Sponsored by: NETASQ --- lib/libpmc/Makefile | 5 + lib/libpmc/libpmc.c | 181 ++++- lib/libpmc/pmc.corei7.3 | 1581 +++++++++++++++++++++++++++++++++++++++++++ lib/libpmc/pmc.corei7uc.3 | 880 ++++++++++++++++++++++++ lib/libpmc/pmc.ucf.3 | 115 ++++ lib/libpmc/pmc.westmere.3 | 1329 ++++++++++++++++++++++++++++++++++++ lib/libpmc/pmc.westmereuc.3 | 1083 +++++++++++++++++++++++++++++ 7 files changed, 5167 insertions(+), 7 deletions(-) create mode 100644 lib/libpmc/pmc.corei7.3 create mode 100644 lib/libpmc/pmc.corei7uc.3 create mode 100644 lib/libpmc/pmc.ucf.3 create mode 100644 lib/libpmc/pmc.westmere.3 create mode 100644 lib/libpmc/pmc.westmereuc.3 (limited to 'lib') diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile index 7d97c96..3ad1c4d 100644 --- a/lib/libpmc/Makefile +++ b/lib/libpmc/Makefile @@ -28,11 +28,16 @@ MAN+= pmc.atom.3 MAN+= pmc.core.3 MAN+= pmc.core2.3 MAN+= pmc.iaf.3 +MAN+= pmc.ucf.3 MAN+= pmc.k7.3 MAN+= pmc.k8.3 MAN+= pmc.p4.3 MAN+= pmc.p5.3 MAN+= pmc.p6.3 +MAN+= pmc.corei7.3 +MAN+= pmc.corei7uc.3 +MAN+= pmc.westmere.3 +MAN+= pmc.westmereuc.3 MAN+= pmc.tsc.3 MLINKS+= \ diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index af716da..929ca73 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -54,6 +54,10 @@ static int iaf_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); static int iap_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); +static int ucf_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +static int ucp_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); static int p4_allocate_pmc(enum pmc_event _pe, char *_ctrspec, @@ -132,6 +136,7 @@ PMC_CLASSDEP_TABLE(k8, K8); PMC_CLASSDEP_TABLE(p4, P4); PMC_CLASSDEP_TABLE(p5, P5); PMC_CLASSDEP_TABLE(p6, P6); +PMC_CLASSDEP_TABLE(ucf, UCF); #undef __PMC_EV_ALIAS #define __PMC_EV_ALIAS(N,CODE) { N, PMC_EV_##CODE }, @@ -157,6 +162,21 @@ static const struct pmc_event_descr corei7_event_table[] = __PMC_EV_ALIAS_COREI7() }; +static const struct pmc_event_descr westmere_event_table[] = +{ + __PMC_EV_ALIAS_WESTMERE() +}; + +static const struct pmc_event_descr corei7uc_event_table[] = +{ + __PMC_EV_ALIAS_COREI7UC() +}; + +static const struct pmc_event_descr westmereuc_event_table[] = +{ + __PMC_EV_ALIAS_WESTMEREUC() +}; + /* * PMC_MDEP_TABLE(NAME, PRIMARYCLASS, ADDITIONAL_CLASSES...) * @@ -170,7 +190,8 @@ static const struct pmc_event_descr corei7_event_table[] = PMC_MDEP_TABLE(atom, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC); PMC_MDEP_TABLE(core, IAP, PMC_CLASS_TSC); PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC); -PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC); +PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP); +PMC_MDEP_TABLE(westmere, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP); PMC_MDEP_TABLE(k7, K7, PMC_CLASS_TSC); PMC_MDEP_TABLE(k8, K8, PMC_CLASS_TSC); PMC_MDEP_TABLE(p4, P4, PMC_CLASS_TSC); @@ -201,6 +222,10 @@ PMC_CLASS_TABLE_DESC(atom, IAP, atom, iap); PMC_CLASS_TABLE_DESC(core, IAP, core, iap); PMC_CLASS_TABLE_DESC(core2, IAP, core2, iap); PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap); +PMC_CLASS_TABLE_DESC(westmere, IAP, westmere, iap); +PMC_CLASS_TABLE_DESC(ucf, UCF, ucf, ucf); +PMC_CLASS_TABLE_DESC(corei7uc, UCP, corei7uc, ucp); +PMC_CLASS_TABLE_DESC(westmereuc, UCP, westmereuc, ucp); #endif #if defined(__i386__) PMC_CLASS_TABLE_DESC(k7, K7, k7, k7); @@ -281,7 +306,7 @@ struct pmc_masks { const uint32_t pm_value; }; #define PMCMASK(N,V) { .pm_name = #N, .pm_value = (V) } -#define NULLMASK PMCMASK(NULL,0) +#define NULLMASK { .pm_name = NULL } #if defined(__amd64__) || defined(__i386__) static int @@ -474,6 +499,8 @@ static struct pmc_event_alias core2_aliases_without_iaf[] = { #define atom_aliases_without_iaf core2_aliases_without_iaf #define corei7_aliases core2_aliases #define corei7_aliases_without_iaf core2_aliases_without_iaf +#define westmere_aliases core2_aliases +#define westmere_aliases_without_iaf core2_aliases_without_iaf #define IAF_KW_OS "os" #define IAF_KW_USR "usr" @@ -524,6 +551,7 @@ iaf_allocate_pmc(enum pmc_event pe, char *ctrspec, #define IAP_KW_SNOOPTYPE "snooptype" #define IAP_KW_TRANSITION "trans" #define IAP_KW_USR "usr" +#define IAP_KW_RSP "rsp" static struct pmc_masks iap_core_mask[] = { PMCMASK(all, (0x3 << 14)), @@ -571,19 +599,38 @@ static struct pmc_masks iap_transition_mask[] = { NULLMASK }; +static struct pmc_masks iap_rsp_mask[] = { + PMCMASK(DMND_DATA_RD, (1 << 0)), + PMCMASK(DMND_RFO, (1 << 1)), + PMCMASK(DMND_IFETCH, (1 << 2)), + PMCMASK(WB, (1 << 3)), + PMCMASK(PF_DATA_RD, (1 << 4)), + PMCMASK(PF_RFO, (1 << 5)), + PMCMASK(PF_IFETCH, (1 << 6)), + PMCMASK(OTHER, (1 << 7)), + PMCMASK(UNCORE_HIT, (1 << 8)), + PMCMASK(OTHER_CORE_HIT_SNP, (1 << 9)), + PMCMASK(OTHER_CORE_HITM, (1 << 10)), + PMCMASK(REMOTE_CACHE_FWD, (1 << 12)), + PMCMASK(REMOTE_DRAM, (1 << 13)), + PMCMASK(LOCAL_DRAM, (1 << 14)), + PMCMASK(NON_DRAM, (1 << 15)), + NULLMASK +}; + static int iap_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { char *e, *p, *q; - uint32_t cachestate, evmask; + uint32_t cachestate, evmask, rsp; int count, n; pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_QUALIFIER); pmc_config->pm_md.pm_iap.pm_iap_config = 0; - cachestate = evmask = 0; + cachestate = evmask = rsp = 0; /* Parse additional modifiers if present */ while ((p = strsep(&ctrspec, ",")) != NULL) { @@ -630,8 +677,7 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec, return (-1); } else if (cpu_info.pm_cputype == PMC_CPU_INTEL_ATOM || cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2 || - cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME || - cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7) { + cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME) { if (KWPREFIXMATCH(p, IAP_KW_SNOOPRESPONSE "=")) { n = pmc_parse_mask(iap_snoopresponse_mask, p, &evmask); @@ -640,6 +686,12 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec, &evmask); } else return (-1); + } else if (cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7 || + cpu_info.pm_cputype == PMC_CPU_INTEL_WESTMERE) { + if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) { + n = pmc_parse_mask(iap_rsp_mask, p, &rsp); + } else + return (-1); } else return (-1); @@ -672,6 +724,69 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec, } pmc_config->pm_md.pm_iap.pm_iap_config |= cachestate; + pmc_config->pm_md.pm_iap.pm_iap_rsp = rsp; + + return (0); +} + +/* + * Intel Uncore. + */ + +static int +ucf_allocate_pmc(enum pmc_event pe, char *ctrspec, + struct pmc_op_pmcallocate *pmc_config) +{ + (void) pe; + (void) ctrspec; + + pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); + pmc_config->pm_md.pm_ucf.pm_ucf_flags = 0; + + return (0); +} + +#define UCP_KW_CMASK "cmask" +#define UCP_KW_EDGE "edge" +#define UCP_KW_INV "inv" + +static int +ucp_allocate_pmc(enum pmc_event pe, char *ctrspec, + struct pmc_op_pmcallocate *pmc_config) +{ + char *e, *p, *q; + int count, n; + + (void) pe; + + pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE | + PMC_CAP_QUALIFIER); + pmc_config->pm_md.pm_ucp.pm_ucp_config = 0; + + /* Parse additional modifiers if present */ + while ((p = strsep(&ctrspec, ",")) != NULL) { + + n = 0; + if (KWPREFIXMATCH(p, UCP_KW_CMASK "=")) { + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return (-1); + count = strtol(q, &e, 0); + if (e == q || *e != '\0') + return (-1); + pmc_config->pm_caps |= PMC_CAP_THRESHOLD; + pmc_config->pm_md.pm_ucp.pm_ucp_config |= + UCP_CMASK(count); + } else if (KWMATCH(p, UCP_KW_EDGE)) { + pmc_config->pm_caps |= PMC_CAP_EDGE; + } else if (KWMATCH(p, UCP_KW_INV)) { + pmc_config->pm_caps |= PMC_CAP_INVERT; + } else + return (-1); + + if (n < 0) /* Parsing failed. */ + return (-1); + } return (0); } @@ -2309,6 +2424,31 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, ev = corei7_event_table; count = PMC_EVENT_TABLE_SIZE(corei7); break; + case PMC_CPU_INTEL_WESTMERE: + ev = westmere_event_table; + count = PMC_EVENT_TABLE_SIZE(westmere); + break; + } + break; + case PMC_CLASS_UCF: + ev = ucf_event_table; + count = PMC_EVENT_TABLE_SIZE(ucf); + break; + case PMC_CLASS_UCP: + /* + * Return the most appropriate set of event name + * spellings for the current CPU. + */ + switch (cpu_info.pm_cputype) { + default: + case PMC_CPU_INTEL_COREI7: + ev = corei7uc_event_table; + count = PMC_EVENT_TABLE_SIZE(corei7uc); + break; + case PMC_CPU_INTEL_WESTMERE: + ev = westmereuc_event_table; + count = PMC_EVENT_TABLE_SIZE(westmereuc); + break; } break; case PMC_CLASS_TSC: @@ -2514,8 +2654,15 @@ pmc_init(void) PMC_MDEP_INIT_INTEL_V2(core2); break; case PMC_CPU_INTEL_COREI7: + pmc_class_table[n++] = &ucf_class_table_descr; + pmc_class_table[n++] = &corei7uc_class_table_descr; PMC_MDEP_INIT_INTEL_V2(corei7); break; + case PMC_CPU_INTEL_WESTMERE: + pmc_class_table[n++] = &ucf_class_table_descr; + pmc_class_table[n++] = &westmereuc_class_table_descr; + PMC_MDEP_INIT_INTEL_V2(westmere); + break; case PMC_CPU_INTEL_PIV: PMC_MDEP_INIT(p4); pmc_class_table[n] = &p4_class_table_descr; @@ -2618,10 +2765,30 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) ev = corei7_event_table; evfence = corei7_event_table + PMC_EVENT_TABLE_SIZE(corei7); break; + case PMC_CPU_INTEL_WESTMERE: + ev = westmere_event_table; + evfence = westmere_event_table + PMC_EVENT_TABLE_SIZE(westmere); + break; + default: /* Unknown CPU type. */ + break; + } + } else if (pe >= PMC_EV_UCF_FIRST && pe <= PMC_EV_UCF_LAST) { + ev = ucf_event_table; + evfence = ucf_event_table + PMC_EVENT_TABLE_SIZE(ucf); + } else if (pe >= PMC_EV_UCP_FIRST && pe <= PMC_EV_UCP_LAST) { + switch (cpu) { + case PMC_CPU_INTEL_COREI7: + ev = corei7uc_event_table; + evfence = corei7uc_event_table + PMC_EVENT_TABLE_SIZE(corei7uc); + break; + case PMC_CPU_INTEL_WESTMERE: + ev = westmereuc_event_table; + evfence = westmereuc_event_table + PMC_EVENT_TABLE_SIZE(westmereuc); + break; default: /* Unknown CPU type. */ break; } - } if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) { + } else if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) { ev = k7_event_table; evfence = k7_event_table + PMC_EVENT_TABLE_SIZE(k7); } else if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) { diff --git a/lib/libpmc/pmc.corei7.3 b/lib/libpmc/pmc.corei7.3 new file mode 100644 index 0000000..1a8b9d9 --- /dev/null +++ b/lib/libpmc/pmc.corei7.3 @@ -0,0 +1,1581 @@ +.\" Copyright (c) 2010 Fabien Thomas. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" This software is provided by Joseph Koshy ``as is'' and +.\" any express or implied warranties, including, but not limited to, the +.\" implied warranties of merchantability and fitness for a particular purpose +.\" are disclaimed. in no event shall Joseph Koshy be liable +.\" for any direct, indirect, incidental, special, exemplary, or consequential +.\" damages (including, but not limited to, procurement of substitute goods +.\" or services; loss of use, data, or profits; or business interruption) +.\" however caused and on any theory of liability, whether in contract, strict +.\" liability, or tort (including negligence or otherwise) arising in any way +.\" out of the use of this software, even if advised of the possibility of +.\" such damage. +.\" +.\" $FreeBSD$ +.\" +.Dd March 24, 2010 +.Os +.Dt PMC.COREI7 3 +.Sh NAME +.Nm pmc.corei7 +.Nd measurement events for +.Tn Intel +.Tn Core i7 and Xeon 5500 +family CPUs +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Sh DESCRIPTION +.Tn Intel +.Tn "Core i7" +CPUs contain PMCs conforming to version 2 of the +.Tn Intel +performance measurement architecture. +These CPUs may contain up to three classes of PMCs: +.Bl -tag -width "Li PMC_CLASS_IAP" +.It Li PMC_CLASS_IAF +Fixed-function counters that count only one hardware event per counter. +.It Li PMC_CLASS_IAP +Programmable counters that may be configured to count one of a defined +set of hardware events. +.El +.Pp +The number of PMCs available in each class and their widths need to be +determined at run time by calling +.Xr pmc_cpuinfo 3 . +.Pp +Intel Core i7 and Xeon 5500 PMCs are documented in +.Rs +.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual" +.%T "Volume 3B: System Programming Guide, Part 2" +.%N "Order Number: 253669-033US" +.%D December 2009 +.%Q "Intel Corporation" +.Re +.Ss COREI7 AND XEON 5500 FIXED FUNCTION PMCS +These PMCs and their supported events are documented in +.Xr pmc.iaf 3 . +Not all CPUs in this family implement fixed-function counters. +.Ss COREI7 AND XEON 5500 PROGRAMMABLE PMCS +The programmable PMCs support the following capabilities: +.Bl -column "PMC_CAP_INTERRUPT" "Support" +.It Em Capability Ta Em Support +.It PMC_CAP_CASCADE Ta \&No +.It PMC_CAP_EDGE Ta Yes +.It PMC_CAP_INTERRUPT Ta Yes +.It PMC_CAP_INVERT Ta Yes +.It PMC_CAP_READ Ta Yes +.It PMC_CAP_PRECISE Ta \&No +.It PMC_CAP_SYSTEM Ta Yes +.It PMC_CAP_TAGGING Ta \&No +.It PMC_CAP_THRESHOLD Ta Yes +.It PMC_CAP_USER Ta Yes +.It PMC_CAP_WRITE Ta Yes +.El +.Ss Event Qualifiers +Event specifiers for these PMCs support the following common +qualifiers: +.Bl -tag -width indent +.It Li rsp= Ns Ar value +Configure the Off-core Response bits. +.Bl -tag -width indent +.It Li DMND_DATA_RD +Counts the number of demand and DCU prefetch data reads of full +and partial cachelines as well as demand data page table entry +cacheline reads. Does not count L2 data read prefetches or +instruction fetches. +.It Li DMND_RFO +Counts the number of demand and DCU prefetch reads for ownership +(RFO) requests generated by a write to data cacheline. Does not +count L2 RFO. +.It Li DMND_IFETCH +Counts the number of demand and DCU prefetch instruction cacheline +reads. Does not count L2 code read prefetches. +WB +Counts the number of writeback (modified to exclusive) transactions. +.It Li PF_DATA_RD +Counts the number of data cacheline reads generated by L2 prefetchers. +.It Li PF_RFO +Counts the number of RFO requests generated by L2 prefetchers. +.It Li PF_IFETCH +Counts the number of code reads generated by L2 prefetchers. +.It Li OTHER +Counts one of the following transaction types, including L3 invalidate, +I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, +lock, unlock, split lock. +.It Li UNCORE_HIT +L3 Hit: local or remote home requests that hit L3 cache in the uncore +with no coherency actions required (snooping). +.It Li OTHER_CORE_HIT_SNP +L3 Hit: local or remote home requests that hit L3 cache in the uncore +and was serviced by another core with a cross core snoop where no modified +copies were found (clean). +.It Li OTHER_CORE_HITM +L3 Hit: local or remote home requests that hit L3 cache in the uncore +and was serviced by another core with a cross core snoop where modified +copies were found (HITM). +.It Li REMOTE_CACHE_FWD +L3 Miss: local homed requests that missed the L3 cache and was serviced +by forwarded data following a cross package snoop where no modified +copies found. (Remote home requests are not counted) +.It Li REMOTE_DRAM +L3 Miss: remote home requests that missed the L3 cache and were serviced +by remote DRAM. +.It Li LOCAL_DRAM +L3 Miss: local home requests that missed the L3 cache and were serviced +by local DRAM. +.It Li NON_DRAM +Non-DRAM requests that were serviced by IOH. +.El +.It Li cmask= Ns Ar value +Configure the PMC to increment only if the number of configured +events measured in a cycle is greater than or equal to +.Ar value . +.It Li edge +Configure the PMC to count the number of de-asserted to asserted +transitions of the conditions expressed by the other qualifiers. +If specified, the counter will increment only once whenever a +condition becomes true, irrespective of the number of clocks during +which the condition remains true. +.It Li inv +Invert the sense of comparison when the +.Dq Li cmask +qualifier is present, making the counter increment when the number of +events per cycle is less than the value specified by the +.Dq Li cmask +qualifier. +.It Li os +Configure the PMC to count events happening at processor privilege +level 0. +.It Li usr +Configure the PMC to count events occurring at privilege levels 1, 2 +or 3. +.El +.Pp +If neither of the +.Dq Li os +or +.Dq Li usr +qualifiers are specified, the default is to enable both. +.Ss Event Specifiers (Programmable PMCs) +Core i7 and Xeon 5500 programmable PMCs support the following events: +.Bl -tag -width indent +.It Li SB_DRAIN.ANY +.Pq Event 04H , Umask 07H +Counts the number of store buffer drains. +.It Li STORE_BLOCKS.AT_RET +.Pq Event 06H , Umask 04H +Counts number of loads delayed with at-Retirement block code. The following +loads need to be executed at retirement and wait for all senior stores on +the same thread to be drained: load splitting across 4K boundary (page +split), load accessing uncacheable (UC or USWC) memory, load lock, and load +with page table in UC or USWC memory region. +.It Li STORE_BLOCKS.L1D_BLOCK +.Pq Event 06H , Umask 08H +Cacheable loads delayed with L1D block code +.It Li PARTIAL_ADDRESS_ALIAS +.Pq Event 07H , Umask 01H +Counts false dependency due to partial address aliasing +.It Li DTLB_LOAD_MISSES.ANY +.Pq Event 08H , Umask 01H +Counts all load misses that cause a page walk +.It Li DTLB_LOAD_MISSES.WALK_COMPLETED +.Pq Event 08H , Umask 02H +Counts number of completed page walks due to load miss in the STLB. +.It Li DTLB_LOAD_MISSES.STLB_HIT +.Pq Event 08H , Umask 10H +Number of cache load STLB hits +.It Li DTLB_LOAD_MISSES.PDE_MISS +.Pq Event 08H , Umask 20H +Number of DTLB cache load misses where the low part of the linear to +physical address translation was missed. +.It Li DTLB_LOAD_MISSES.PDP_MISS +.Pq Event 08H , Umask 40H +Number of DTLB cache load misses where the high part of the linear to +physical address translation was missed. +.It Li DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED +.Pq Event 08H , Umask 80H +Counts number of completed large page walks due to load miss in the STLB. +.It Li MEM_INST_RETIRED.LOADS +.Pq Event 0BH , Umask 01H +Counts the number of instructions with an architecturally-visible store +retired on the architected path. +In conjunction with ld_lat facility +.It Li MEM_INST_RETIRED.STORES +.Pq Event 0BH , Umask 02H +Counts the number of instructions with an architecturally-visible store +retired on the architected path. +In conjunction with ld_lat facility +.It Li MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD +.Pq Event 0BH , Umask 10H +Counts the number of instructions exceeding the latency specified with +ld_lat facility. +In conjunction with ld_lat facility +.It Li MEM_STORE_RETIRED.DTLB_MISS +.Pq Event 0CH , Umask 01H +The event counts the number of retired stores that missed the DTLB. The DTLB +miss is not counted if the store operation causes a fault. Does not counter +prefetches. Counts both primary and secondary misses to the TLB +.It Li UOPS_ISSUED.ANY +.Pq Event 0EH , Umask 01H +Counts the number of Uops issued by the Register Allocation Table to the +Reservation Station, i.e. the UOPs issued from the front end to the back +end. +.It Li UOPS_ISSUED.STALLED_CYCLES +.Pq Event 0EH , Umask 01H +Counts the number of cycles no Uops issued by the Register Allocation Table +to the Reservation Station, i.e. the UOPs issued from the front end to the +back end. +set invert=1, cmask = 1 +.It Li UOPS_ISSUED.FUSED +.Pq Event 0EH , Umask 02H +Counts the number of fused Uops that were issued from the Register +Allocation Table to the Reservation Station. +.It Li MEM_UNCORE_RETIRED.L3_DATA_MISS_UNKNOWN +.Pq Event 0FH , Umask 01H +Counts number of memory load instructions retired where the memory reference +missed L3 and data source is unknown. +Available only for CPUID signature 06_2EH +.It Li MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM +.Pq Event 0FH , Umask 02H +Counts number of memory load instructions retired where the memory reference +hit modified data in a sibling core residing on the same socket. +.It Li MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT +.Pq Event 0FH , Umask 08H +Counts number of memory load instructions retired where the memory reference +missed the L1, L2 and L3 caches and HIT in a remote socket's cache. Only +counts locally homed lines. +.It Li MEM_UNCORE_RETIRED.REMOTE_DRAM +.Pq Event 0FH , Umask 10H +Counts number of memory load instructions retired where the memory reference +missed the L1, L2 and L3 caches and was remotely homed. This includes both +DRAM access and HITM in a remote socket's cache for remotely homed lines. +.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM +.Pq Event 0FH , Umask 20H +Counts number of memory load instructions retired where the memory reference +missed the L1, L2 and L3 caches and required a local socket memory +reference. This includes locally homed cachelines that were in a modified +state in another socket. +.It Li MEM_UNCORE_RETIRED.UNCACHEABLE +.Pq Event 0FH , Umask 80H +Counts number of memory load instructions retired where the memory reference +missed the L1, L2 and L3 caches and to perform I/O. +Available only for CPUID signature 06_2EH +.It Li FP_COMP_OPS_EXE.X87 +.Pq Event 10H , Umask 01H +Counts the number of FP Computational Uops Executed. The number of FADD, +FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer +DIVs, and IDIVs. This event does not distinguish an FADD used in the middle +of a transcendental flow from a separate FADD instruction. +.It Li FP_COMP_OPS_EXE.MMX +.Pq Event 10H , Umask 02H +Counts number of MMX Uops executed. +.It Li FP_COMP_OPS_EXE.SSE_FP +.Pq Event 10H , Umask 04H +Counts number of SSE and SSE2 FP uops executed. +.It Li FP_COMP_OPS_EXE.SSE2_INTEGER +.Pq Event 10H , Umask 08H +Counts number of SSE2 integer uops executed. +.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED +.Pq Event 10H , Umask 10H +Counts number of SSE FP packed uops executed. +.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR +.Pq Event 10H , Umask 20H +Counts number of SSE FP scalar uops executed. +.It Li FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION +.Pq Event 10H , Umask 40H +Counts number of SSE* FP single precision uops executed. +.It Li FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION +.Pq Event 10H , Umask 80H +Counts number of SSE* FP double precision uops executed. +.It Li SIMD_INT_128.PACKED_MPY +.Pq Event 12H , Umask 01H +Counts number of 128 bit SIMD integer multiply operations. +.It Li SIMD_INT_128.PACKED_SHIFT +.Pq Event 12H , Umask 02H +Counts number of 128 bit SIMD integer shift operations. +.It Li SIMD_INT_128.PACK +.Pq Event 12H , Umask 04H +Counts number of 128 bit SIMD integer pack operations. +.It Li SIMD_INT_128.UNPACK +.Pq Event 12H , Umask 08H +Counts number of 128 bit SIMD integer unpack operations. +.It Li SIMD_INT_128.PACKED_LOGICAL +.Pq Event 12H , Umask 10H +Counts number of 128 bit SIMD integer logical operations. +.It Li SIMD_INT_128.PACKED_ARITH +.Pq Event 12H , Umask 20H +Counts number of 128 bit SIMD integer arithmetic operations. +.It Li SIMD_INT_128.SHUFFLE_MOVE +.Pq Event 12H , Umask 40H +Counts number of 128 bit SIMD integer shuffle and move operations. +.It Li LOAD_DISPATCH.RS +.Pq Event 13H , Umask 01H +Counts number of loads dispatched from the Reservation Station that bypass +the Memory Order Buffer. +.It Li LOAD_DISPATCH.RS_DELAYED +.Pq Event 13H , Umask 02H +Counts the number of delayed RS dispatches at the stage latch. If an RS +dispatch can not bypass to LB, it has another chance to dispatch from the +one-cycle delayed staging latch before it is written into the LB. +.It Li LOAD_DISPATCH.MOB +.Pq Event 13H , Umask 04H +Counts the number of loads dispatched from the Reservation Station to the +Memory Order Buffer. +.It Li LOAD_DISPATCH.ANY +.Pq Event 13H , Umask 07H +Counts all loads dispatched from the Reservation Station. +.It Li ARITH.CYCLES_DIV_BUSY +.Pq Event 14H , Umask 01H +Counts the number of cycles the divider is busy executing divide or square +root operations. The divide can be integer, X87 or Streaming SIMD Extensions +(SSE). The square root operation can be either X87 or SSE. +Set 'edge =1, invert=1, cmask=1' to count the number of divides. +Count may be incorrect When SMT is on. +.It Li ARITH.MUL +.Pq Event 14H , Umask 02H +Counts the number of multiply operations executed. This includes integer as +well as floating point multiply operations but excludes DPPS mul and MPSAD. +Count may be incorrect When SMT is on +.It Li INST_QUEUE_WRITES +.Pq Event 17H , Umask 01H +Counts the number of instructions written into the instruction queue every +cycle. +.It Li INST_DECODED.DEC0 +.Pq Event 18H , Umask 01H +Counts number of instructions that require decoder 0 to be decoded. Usually, +this means that the instruction maps to more than 1 uop +.It Li TWO_UOP_INSTS_DECODED +.Pq Event 19H , Umask 01H +An instruction that generates two uops was decoded +.It Li INST_QUEUE_WRITE_CYCLES +.Pq Event 1EH , Umask 01H +This event counts the number of cycles during which instructions are written +to the instruction queue. Dividing this counter by the number of +instructions written to the instruction queue (INST_QUEUE_WRITES) yields the +average number of instructions decoded each cycle. If this number is less +than four and the pipe stalls, this indicates that the decoder is failing to +decode enough instructions per cycle to sustain the 4-wide pipeline. +If SSE* instructions that are 6 bytes or longer arrive one after another, +then front end throughput may limit execution speed. In such case, +.It Li LSD_OVERFLOW +.Pq Event 20H , Umask 01H +Counts number of loops that cant stream from the instruction queue. +.It Li L2_RQSTS.LD_HIT +.Pq Event 24H , Umask 01H +Counts number of loads that hit the L2 cache. L2 loads include both L1D +demand misses as well as L1D prefetches. L2 loads can be rejected for +various reasons. Only non rejected loads are counted. +.It Li L2_RQSTS.LD_MISS +.Pq Event 24H , Umask 02H +Counts the number of loads that miss the L2 cache. L2 loads include both L1D +demand misses as well as L1D prefetches. +.It Li L2_RQSTS.LOADS +.Pq Event 24H , Umask 03H +Counts all L2 load requests. L2 loads include both L1D demand misses as well +as L1D prefetches. +.It Li L2_RQSTS.RFO_HIT +.Pq Event 24H , Umask 04H +Counts the number of store RFO requests that hit the L2 cache. L2 RFO +requests include both L1D demand RFO misses as well as L1D RFO prefetches. +Count includes WC memory requests, where the data is not fetched but the +permission to write the line is required. +.It Li L2_RQSTS.RFO_MISS +.Pq Event 24H , Umask 08H +Counts the number of store RFO requests that miss the L2 cache. L2 RFO +requests include both L1D demand RFO misses as well as L1D RFO prefetches. +.It Li L2_RQSTS.RFOS +.Pq Event 24H , Umask 0CH +Counts all L2 store RFO requests. L2 RFO requests include both L1D demand +RFO misses as well as L1D RFO prefetches. +.It Li L2_RQSTS.IFETCH_HIT +.Pq Event 24H , Umask 10H +Counts number of instruction fetches that hit the L2 cache. L2 instruction +fetches include both L1I demand misses as well as L1I instruction +prefetches. +.It Li L2_RQSTS.IFETCH_MISS +.Pq Event 24H , Umask 20H +Counts number of instruction fetches that miss the L2 cache. L2 instruction +fetches include both L1I demand misses as well as L1I instruction +prefetches. +.It Li L2_RQSTS.IFETCHES +.Pq Event 24H , Umask 30H +Counts all instruction fetches. L2 instruction fetches include both L1I +demand misses as well as L1I instruction prefetches. +.It Li L2_RQSTS.PREFETCH_HIT +.Pq Event 24H , Umask 40H +Counts L2 prefetch hits for both code and data. +.It Li L2_RQSTS.PREFETCH_MISS +.Pq Event 24H , Umask 80H +Counts L2 prefetch misses for both code and data. +.It Li L2_RQSTS.PREFETCHES +.Pq Event 24H , Umask C0H +Counts all L2 prefetches for both code and data. +.It Li L2_RQSTS.MISS +.Pq Event 24H , Umask AAH +Counts all L2 misses for both code and data. +.It Li L2_RQSTS.REFERENCES +.Pq Event 24H , Umask FFH +Counts all L2 requests for both code and data. +.It Li L2_DATA_RQSTS.DEMAND.I_STATE +.Pq Event 26H , Umask 01H +Counts number of L2 data demand loads where the cache line to be loaded is +in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D +demand misses and L1D prefetches. +.It Li L2_DATA_RQSTS.DEMAND.S_STATE +.Pq Event 26H , Umask 02H +Counts number of L2 data demand loads where the cache line to be loaded is +in the S (shared) state. L2 demand loads are both L1D demand misses and L1D +prefetches. +.It Li L2_DATA_RQSTS.DEMAND.E_STATE +.Pq Event 26H , Umask 04H +Counts number of L2 data demand loads where the cache line to be loaded is +in the E (exclusive) state. L2 demand loads are both L1D demand misses and +L1D prefetches. +.It Li L2_DATA_RQSTS.DEMAND.M_STATE +.Pq Event 26H , Umask 08H +Counts number of L2 data demand loads where the cache line to be loaded is +in the M (modified) state. L2 demand loads are both L1D demand misses and +L1D prefetches. +.It Li L2_DATA_RQSTS.DEMAND.MESI +.Pq Event 26H , Umask 0FH +Counts all L2 data demand requests. L2 demand loads are both L1D demand +misses and L1D prefetches. +.It Li L2_DATA_RQSTS.PREFETCH.I_STATE +.Pq Event 26H , Umask 10H +Counts number of L2 prefetch data loads where the cache line to be loaded is +in the I (invalid) state, i.e. a cache miss. +.It Li L2_DATA_RQSTS.PREFETCH.S_STATE +.Pq Event 26H , Umask 20H +Counts number of L2 prefetch data loads where the cache line to be loaded is +in the S (shared) state. A prefetch RFO will miss on an S state line, while +a prefetch read will hit on an S state line. +.It Li L2_DATA_RQSTS.PREFETCH.E_STATE +.Pq Event 26H , Umask 40H +Counts number of L2 prefetch data loads where the cache line to be loaded is +in the E (exclusive) state. +.It Li L2_DATA_RQSTS.PREFETCH.M_STATE +.Pq Event 26H , Umask 80H +Counts number of L2 prefetch data loads where the cache line to be loaded is +in the M (modified) state. +.It Li L2_DATA_RQSTS.PREFETCH.MESI +.Pq Event 26H , Umask F0H +Counts all L2 prefetch requests. +.It Li L2_DATA_RQSTS.ANY +.Pq Event 26H , Umask FFH +Counts all L2 data requests. +.It Li L2_WRITE.RFO.I_STATE +.Pq Event 27H , Umask 01H +Counts number of L2 demand store RFO requests where the cache line to be +loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher +does not issue a RFO prefetch. +This is a demand RFO request +.It Li L2_WRITE.RFO.S_STATE +.Pq Event 27H , Umask 02H +Counts number of L2 store RFO requests where the cache line to be loaded is +in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,. +This is a demand RFO request +.It Li L2_WRITE.RFO.M_STATE +.Pq Event 27H , Umask 08H +Counts number of L2 store RFO requests where the cache line to be loaded is +in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch. +This is a demand RFO request +.It Li L2_WRITE.RFO.HIT +.Pq Event 27H , Umask 0EH +Counts number of L2 store RFO requests where the cache line to be loaded is +in either the S, E or M states. The L1D prefetcher does not issue a RFO +prefetch. +This is a demand RFO request +.It Li L2_WRITE.RFO.MESI +.Pq Event 27H , Umask 0FH +Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO +prefetch. +This is a demand RFO request +.It Li L2_WRITE.LOCK.I_STATE +.Pq Event 27H , Umask 10H +Counts number of L2 demand lock RFO requests where the cache line to be +loaded is in the I (invalid) state, i.e. a cache miss. +.It Li L2_WRITE.LOCK.S_STATE +.Pq Event 27H , Umask 20H +Counts number of L2 lock RFO requests where the cache line to be loaded is +in the S (shared) state. +.It Li L2_WRITE.LOCK.E_STATE +.Pq Event 27H , Umask 40H +Counts number of L2 demand lock RFO requests where the cache line to be +loaded is in the E (exclusive) state. +.It Li L2_WRITE.LOCK.M_STATE +.Pq Event 27H , Umask 80H +Counts number of L2 demand lock RFO requests where the cache line to be +loaded is in the M (modified) state. +.It Li L2_WRITE.LOCK.HIT +.Pq Event 27H , Umask E0H +Counts number of L2 demand lock RFO requests where the cache line to be +loaded is in either the S, E, or M state. +.It Li L2_WRITE.LOCK.MESI +.Pq Event 27H , Umask F0H +Counts all L2 demand lock RFO requests. +.It Li L1D_WB_L2.I_STATE +.Pq Event 28H , Umask 01H +Counts number of L1 writebacks to the L2 where the cache line to be written +is in the I (invalid) state, i.e. a cache miss. +.It Li L1D_WB_L2.S_STATE +.Pq Event 28H , Umask 02H +Counts number of L1 writebacks to the L2 where the cache line to be written +is in the S state. +.It Li L1D_WB_L2.E_STATE +.Pq Event 28H , Umask 04H +Counts number of L1 writebacks to the L2 where the cache line to be written +is in the E (exclusive) state. +.It Li L1D_WB_L2.M_STATE +.Pq Event 28H , Umask 08H +Counts number of L1 writebacks to the L2 where the cache line to be written +is in the M (modified) state. +.It Li L1D_WB_L2.MESI +.Pq Event 28H , Umask 0FH +Counts all L1 writebacks to the L2. +.It Li L3_LAT_CACHE.REFERENCE +.Pq Event 2EH , Umask 4FH +This event counts requests originating from the core that reference a cache +line in the last level cache. The event count includes speculative traffic +but excludes cache line fills due to a L2 hardware-prefetch. Because cache +hierarchy, cache sizes and other implementation-specific characteristics; +value comparison to estimate performance differences is not recommended. +see Table A-1 +.It Li L3_LAT_CACHE.MISS +.Pq Event 2EH , Umask 41H +This event counts each cache miss condition for references to the last level +cache. The event count may include speculative traffic but excludes cache +line fills due to L2 hardware-prefetches. Because cache hierarchy, cache +sizes and other implementation-specific characteristics; value comparison to +estimate performance differences is not recommended. +see Table A-1 +.It Li CPU_CLK_UNHALTED.THREAD_P +.Pq Event 3CH , Umask 00H +Counts the number of thread cycles while the thread is not in a halt state. +The thread enters the halt state when it is running the HLT instruction. The +core frequency may change from time to time due to power or thermal +throttling. +see Table A-1 +.It Li CPU_CLK_UNHALTED.REF_P +.Pq Event 3CH , Umask 01H +Increments at the frequency of TSC when not halted. +see Table A-1 +.It Li L1D_CACHE_LD.I_STATE +.Pq Event 40H , Umask 01H +Counts L1 data cache read requests where the cache line to be loaded is in +the I (invalid) state, i.e. the read request missed the cache. +Counter 0, 1 only +.It Li L1D_CACHE_LD.S_STATE +.Pq Event 40H , Umask 02H +Counts L1 data cache read requests where the cache line to be loaded is in +the S (shared) state. +Counter 0, 1 only +.It Li L1D_CACHE_LD.E_STATE +.Pq Event 40H , Umask 04H +Counts L1 data cache read requests where the cache line to be loaded is in +the E (exclusive) state. +Counter 0, 1 only +.It Li L1D_CACHE_LD.M_STATE +.Pq Event 40H , Umask 08H +Counts L1 data cache read requests where the cache line to be loaded is in +the M (modified) state. +Counter 0, 1 only +.It Li L1D_CACHE_LD.MESI +.Pq Event 40H , Umask 0FH +Counts L1 data cache read requests. +Counter 0, 1 only +.It Li L1D_CACHE_ST.S_STATE +.Pq Event 41H , Umask 02H +Counts L1 data cache store RFO requests where the cache line to be loaded is +in the S (shared) state. +Counter 0, 1 only +.It Li L1D_CACHE_ST.E_STATE +.Pq Event 41H , Umask 04H +Counts L1 data cache store RFO requests where the cache line to be loaded is +in the E (exclusive) state. +Counter 0, 1 only +.It Li L1D_CACHE_ST.M_STATE +.Pq Event 41H , Umask 08H +Counts L1 data cache store RFO requests where cache line to be loaded is in +the M (modified) state. +Counter 0, 1 only +.It Li L1D_CACHE_LOCK.HIT +.Pq Event 42H , Umask 01H +Counts retired load locks that hit in the L1 data cache or hit in an already +allocated fill buffer. The lock portion of the load lock transaction must +hit in the L1D. +The initial load will pull the lock into the L1 data cache. Counter 0, 1 +only +.It Li L1D_CACHE_LOCK.S_STATE +.Pq Event 42H , Umask 02H +Counts L1 data cache retired load locks that hit the target cache line in +the shared state. +Counter 0, 1 only +.It Li L1D_CACHE_LOCK.E_STATE +.Pq Event 42H , Umask 04H +Counts L1 data cache retired load locks that hit the target cache line in +the exclusive state. +Counter 0, 1 only +.It Li L1D_CACHE_LOCK.M_STATE +.Pq Event 42H , Umask 08H +Counts L1 data cache retired load locks that hit the target cache line in +the modified state. +Counter 0, 1 only +.It Li L1D_ALL_REF.ANY +.Pq Event 43H , Umask 01H +Counts all references (uncached, speculated and retired) to the L1 data +cache, including all loads and stores with any memory types. The event +counts memory accesses only when they are actually performed. For example, a +load blocked by unknown store address and later performed is only counted +once. +The event does not include non- memory accesses, such as I/O accesses. +Counter 0, 1 only +.It Li L1D_ALL_REF.CACHEABLE +.Pq Event 43H , Umask 02H +Counts all data reads and writes (speculated and retired) from cacheable +memory, including locked operations. +Counter 0, 1 only +.It Li L1D_PEND_MISS.LOAD_BUFFERS_FULL +.Pq Event 48H , Umask 02H +Counts cycles of L1 data cache load fill buffers full. +Counter 0, 1 only +.It Li DTLB_MISSES.ANY +.Pq Event 49H , Umask 01H +Counts the number of misses in the STLB which causes a page walk. +.It Li DTLB_MISSES.WALK_COMPLETED +.Pq Event 49H , Umask 02H +Counts number of misses in the STLB which resulted in a completed page walk. +.It Li DTLB_MISSES.STLB_HIT +.Pq Event 49H , Umask 10H +Counts the number of DTLB first level misses that hit in the second level +TLB. This event is only relevant if the core contains multiple DTLB levels. +.It Li LOAD_HIT_PRE +.Pq Event 4CH , Umask 01H +Counts load operations sent to the L1 data cache while a previous SSE +prefetch instruction to the same cache line has started prefetching but has +not yet finished. +.It Li L1D_PREFETCH.REQUESTS +.Pq Event 4EH , Umask 01H +Counts number of hardware prefetch requests dispatched out of the prefetch +FIFO. +.It Li L1D_PREFETCH.MISS +.Pq Event 4EH , Umask 02H +Counts number of hardware prefetch requests that miss the L1D. There are two +prefetchers in the L1D. A streamer, which predicts lines sequentially after +this one should be fetched, and the IP prefetcher that remembers access +patterns for the current instruction. The streamer prefetcher stops on an +L1D hit, while the IP prefetcher does not. +.It Li L1D_PREFETCH.TRIGGERS +.Pq Event 4EH , Umask 04H +Counts number of prefetch requests triggered by the Finite State Machine and +pushed into the prefetch FIFO. Some of the prefetch requests are dropped due +to overwrites or competition between the IP index prefetcher and streamer +prefetcher. The prefetch FIFO contains 4 entries. +.It Li L1D.REPL +.Pq Event 51H , Umask 01H +Counts the number of lines brought into the L1 data cache. +Counter 0, 1 only +.It Li L1D.M_REPL +.Pq Event 51H , Umask 02H +Counts the number of modified lines brought into the L1 data cache. +Counter 0, 1 only +.It Li L1D.M_EVICT +.Pq Event 51H , Umask 04H +Counts the number of modified lines evicted from the L1 data cache due to +replacement. +Counter 0, 1 only +.It Li L1D.M_SNOOP_EVICT +.Pq Event 51H , Umask 08H +Counts the number of modified lines evicted from the L1 data cache due to +snoop HITM intervention. +Counter 0, 1 only +.It Li L1D_CACHE_PREFETCH_LOCK_FB_HIT +.Pq Event 52H , Umask 01H +Counts the number of cacheable load lock speculated instructions accepted +into the fill buffer. +.It Li L1D_CACHE_LOCK_FB_HIT +.Pq Event 53H , Umask 01H +Counts the number of cacheable load lock speculated or retired instructions +accepted into the fill buffer. +.It Li CACHE_LOCK_CYCLES.L1D_L2 +.Pq Event 63H , Umask 01H +Cycle count during which the L1D and L2 are locked. A lock is asserted when +there is a locked memory access, due to uncacheable memory, a locked +operation that spans two cache lines, or a page walk from an uncacheable +page table. +Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and +it is highly recommended to avoid such accesses. +.It Li CACHE_LOCK_CYCLES.L1D +.Pq Event 63H , Umask 02H +Counts the number of cycles that cacheline in the L1 data cache unit is +locked. +Counter 0, 1 only. +.It Li IO_TRANSACTIONS +.Pq Event 6CH , Umask 01H +Counts the number of completed I/O transactions. +.It Li L1I.HITS +.Pq Event 80H , Umask 01H +Counts all instruction fetches that hit the L1 instruction cache. +.It Li L1I.MISSES +.Pq Event 80H , Umask 02H +Counts all instruction fetches that miss the L1I cache. This includes +instruction cache misses, streaming buffer misses, victim cache misses and +uncacheable fetches. An instruction fetch miss is counted only once and not +once for every cycle it is outstanding. +.It Li L1I.READS +.Pq Event 80H , Umask 03H +Counts all instruction fetches, including uncacheable fetches that bypass +the L1I. +.It Li L1I.CYCLES_STALLED +.Pq Event 80H , Umask 04H +Cycle counts for which an instruction fetch stalls due to a L1I cache miss, +ITLB miss or ITLB fault. +.It Li LARGE_ITLB.HIT +.Pq Event 82H , Umask 01H +Counts number of large ITLB hits. +.It Li ITLB_MISSES.ANY +.Pq Event 85H , Umask 01H +Counts the number of misses in all levels of the ITLB which causes a page +walk. +.It Li ITLB_MISSES.WALK_COMPLETED +.Pq Event 85H , Umask 02H +Counts number of misses in all levels of the ITLB which resulted in a +completed page walk. +.It Li ILD_STALL.LCP +.Pq Event 87H , Umask 01H +Cycles Instruction Length Decoder stalls due to length changing prefixes: +66, 67 or REX.W (for EM64T) instructions which change the length of the +decoded instruction. +.It Li ILD_STALL.MRU +.Pq Event 87H , Umask 02H +Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU) +Most Recently Used (MRU) bypass. +.It Li ILD_STALL.IQ_FULL +.Pq Event 87H , Umask 04H +Stall cycles due to a full instruction queue. +.It Li ILD_STALL.REGEN +.Pq Event 87H , Umask 08H +Counts the number of regen stalls. +.It Li ILD_STALL.ANY +.Pq Event 87H , Umask 0FH +Counts any cycles the Instruction Length Decoder is stalled. +.It Li BR_INST_EXEC.COND +.Pq Event 88H , Umask 01H +Counts the number of conditional near branch instructions executed, but not +necessarily retired. +.It Li BR_INST_EXEC.DIRECT +.Pq Event 88H , Umask 02H +Counts all unconditional near branch instructions excluding calls and +indirect branches. +.It Li BR_INST_EXEC.INDIRECT_NON_CALL +.Pq Event 88H , Umask 04H +Counts the number of executed indirect near branch instructions that are not +calls. +.It Li BR_INST_EXEC.NON_CALLS +.Pq Event 88H , Umask 07H +Counts all non call near branch instructions executed, but not necessarily +retired. +.It Li BR_INST_EXEC.RETURN_NEAR +.Pq Event 88H , Umask 08H +Counts indirect near branches that have a return mnemonic. +.It Li BR_INST_EXEC.DIRECT_NEAR_CALL +.Pq Event 88H , Umask 10H +Counts unconditional near call branch instructions, excluding non call +branch, executed. +.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL +.Pq Event 88H , Umask 20H +Counts indirect near calls, including both register and memory indirect, +executed. +.It Li BR_INST_EXEC.NEAR_CALLS +.Pq Event 88H , Umask 30H +Counts all near call branches executed, but not necessarily retired. +.It Li BR_INST_EXEC.TAKEN +.Pq Event 88H , Umask 40H +Counts taken near branches executed, but not necessarily retired. +.It Li BR_INST_EXEC.ANY +.Pq Event 88H , Umask 7FH +Counts all near executed branches (not necessarily retired). This includes +only instructions and not micro-op branches. Frequent branching is not +necessarily a major performance issue. However frequent branch +mispredictions may be a problem. +.It Li BR_MISP_EXEC.COND +.Pq Event 89H , Umask 01H +Counts the number of mispredicted conditional near branch instructions +executed, but not necessarily retired. +.It Li BR_MISP_EXEC.DIRECT +.Pq Event 89H , Umask 02H +Counts mispredicted macro unconditional near branch instructions, excluding +calls and indirect branches (should always be 0). +.It Li BR_MISP_EXEC.INDIRECT_NON_CALL +.Pq Event 89H , Umask 04H +Counts the number of executed mispredicted indirect near branch instructions +that are not calls. +.It Li BR_MISP_EXEC.NON_CALLS +.Pq Event 89H , Umask 07H +Counts mispredicted non call near branches executed, but not necessarily +retired. +.It Li BR_MISP_EXEC.RETURN_NEAR +.Pq Event 89H , Umask 08H +Counts mispredicted indirect branches that have a rear return mnemonic. +.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL +.Pq Event 89H , Umask 10H +Counts mispredicted non-indirect near calls executed, (should always be 0). +.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL +.Pq Event 89H , Umask 20H +Counts mispredicted indirect near calls exeucted, including both register +and memory indirect. +.It Li BR_MISP_EXEC.NEAR_CALLS +.Pq Event 89H , Umask 30H +Counts all mispredicted near call branches executed, but not necessarily +retired. +.It Li BR_MISP_EXEC.TAKEN +.Pq Event 89H , Umask 40H +Counts executed mispredicted near branches that are taken, but not +necessarily retired. +.It Li BR_MISP_EXEC.ANY +.Pq Event 89H , Umask 7FH +Counts the number of mispredicted near branch instructions that were +executed, but not necessarily retired. +.It Li RESOURCE_STALLS.ANY +.Pq Event A2H , Umask 01H +Counts the number of Allocator resource related stalls. Includes register +renaming buffer entries, memory buffer entries. In addition to resource +related stalls, this event counts some other events. Includes stalls arising +during branch misprediction recovery, such as if retirement of the +mispredicted branch is delayed and stalls arising while store buffer is +draining from synchronizing operations. +Does not include stalls due to SuperQ (off core) queue full, too many cache +misses, etc. +.It Li RESOURCE_STALLS.LOAD +.Pq Event A2H , Umask 02H +Counts the cycles of stall due to lack of load buffer for load operation. +.It Li RESOURCE_STALLS.RS_FULL +.Pq Event A2H , Umask 04H +This event counts the number of cycles when the number of instructions in +the pipeline waiting for execution reaches the limit the processor can +handle. A high count of this event indicates that there are long latency +operations in the pipe (possibly load and store operations that miss the L2 +cache, or instructions dependent upon instructions further down the pipeline +that have yet to retire. +When RS is full, new instructions can not enter the reservation station and +start execution. +.It Li RESOURCE_STALLS.STORE +.Pq Event A2H , Umask 08H +This event counts the number of cycles that a resource related stall will +occur due to the number of store instructions reaching the limit of the +pipeline, (i.e. all store buffers are used). The stall ends when a store +instruction commits its data to the cache or memory. +.It Li RESOURCE_STALLS.ROB_FULL +.Pq Event A2H , Umask 10H +Counts the cycles of stall due to re- order buffer full. +.It Li RESOURCE_STALLS.FPCW +.Pq Event A2H , Umask 20H +Counts the number of cycles while execution was stalled due to writing the +floating-point unit (FPU) control word. +.It Li RESOURCE_STALLS.MXCSR +.Pq Event A2H , Umask 40H +Stalls due to the MXCSR register rename occurring to close to a previous +MXCSR rename. The MXCSR provides control and status for the MMX registers. +.It Li RESOURCE_STALLS.OTHER +.Pq Event A2H , Umask 80H +Counts the number of cycles while execution was stalled due to other +resource issues. +.It Li MACRO_INSTS.FUSIONS_DECODED +.Pq Event A6H , Umask 01H +Counts the number of instructions decoded that are macro-fused but not +necessarily executed or retired. +.It Li BACLEAR_FORCE_IQ +.Pq Event A7H , Umask 01H +Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ +is also responsible for providing conditional branch prediciton direction +based on a static scheme and dynamic data provided by the L2 Branch +Prediction Unit. If the conditional branch target is not found in the Target +Array and the IQ predicts that the branch is taken, then the IQ will force +the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by +the BAC generates approximately an 8 cycle bubble in the instruction fetch +pipeline. +.It Li LSD.UOPS +.Pq Event A8H , Umask 01H +Counts the number of micro-ops delivered by loop stream detector +Use cmask=1 and invert to count cycles +.It Li ITLB_FLUSH +.Pq Event AEH , Umask 01H +Counts the number of ITLB flushes +.It Li OFFCORE_REQUESTS.L1D_WRITEBACK +.Pq Event B0H , Umask 40H +Counts number of L1D writebacks to the uncore. +.It Li UOPS_EXECUTED.PORT0 +.Pq Event B1H , Umask 01H +Counts number of Uops executed that were issued on port 0. Port 0 handles +integer arithmetic, SIMD and FP add Uops. +.It Li UOPS_EXECUTED.PORT1 +.Pq Event B1H , Umask 02H +Counts number of Uops executed that were issued on port 1. Port 1 handles +integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops. +.It Li UOPS_EXECUTED.PORT2_CORE +.Pq Event B1H , Umask 04H +Counts number of Uops executed that were issued on port 2. Port 2 handles +the load Uops. This is a core count only and can not be collected per +thread. +.It Li UOPS_EXECUTED.PORT3_CORE +.Pq Event B1H , Umask 08H +Counts number of Uops executed that were issued on port 3. Port 3 handles +store Uops. This is a core count only and can not be collected per thread. +.It Li UOPS_EXECUTED.PORT4_CORE +.Pq Event B1H , Umask 10H +Counts number of Uops executed that where issued on port 4. Port 4 handles +the value to be stored for the store Uops issued on port 3. This is a core +count only and can not be collected per thread. +.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5 +.Pq Event B1H , Umask 1FH +Counts cycles when the Uops executed were issued from any ports except port +5. Use Cmask=1 for active cycles; Cmask=0 for weighted cycles; Use CMask=1, +Invert=1 to count P0-4 stalled cycles Use Cmask=1, Edge=1, Invert=1 to count +P0-4 stalls. +.It Li UOPS_EXECUTED.PORT5 +.Pq Event B1H , Umask 20H +Counts number of Uops executed that where issued on port 5. +.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES +.Pq Event B1H , Umask 3FH +Counts cycles when the Uops are executing. Use Cmask=1 for active cycles; +Cmask=0 for weighted cycles; Use CMask=1, Invert=1 to count P0-4 stalled +cycles Use Cmask=1, Edge=1, Invert=1 to count P0-4 stalls. +.It Li UOPS_EXECUTED.PORT015 +.Pq Event B1H , Umask 40H +Counts number of Uops executed that where issued on port 0, 1, or 5. +use cmask=1, invert=1 to count stall cycles +.It Li UOPS_EXECUTED.PORT234 +.Pq Event B1H , Umask 80H +Counts number of Uops executed that where issued on port 2, 3, or 4. +.It Li OFFCORE_REQUESTS_SQ_FULL +.Pq Event B2H , Umask 01H +Counts number of cycles the SQ is full to handle off-core requests. +.It Li OFF_CORE_RESPONSE_0 +.Pq Event B7H , Umask 01H +see Section 30.6.1.3, Off-core Response Performance Monitoring in the +Processor Core +Requires programming MSR 01A6H +.It Li SNOOP_RESPONSE.HIT +.Pq Event B8H , Umask 01H +Counts HIT snoop response sent by this thread in response to a snoop +request. +.It Li SNOOP_RESPONSE.HITE +.Pq Event B8H , Umask 02H +Counts HIT E snoop response sent by this thread in response to a snoop +request. +.It Li SNOOP_RESPONSE.HITM +.Pq Event B8H , Umask 04H +Counts HIT M snoop response sent by this thread in response to a snoop +request. +.It Li OFF_CORE_RESPONSE_1 +.Pq Event BBH , Umask 01H +see Section 30.6.1.3, Off-core Response Performance Monitoring in the +Processor Core +Requires programming MSR 01A7H +.It Li INST_RETIRED.ANY_P +.Pq Event C0H , Umask 01H +See Table A-1 +Notes: INST_RETIRED.ANY is counted by a designated fixed counter. +INST_RETIRED.ANY_P is counted by a programmable counter and is an +architectural performance event. Event is supported if CPUID.A.EBX[1] = 0. +Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not +count as retired instructions. +.It Li INST_RETIRED.X87 +.Pq Event C0H , Umask 02H +Counts the number of MMX instructions retired:. +.It Li INST_RETIRED.MMX +.Pq Event C0H , Umask 04H +Counts the number of floating point computational operations retired: +floating point computational operations executed by the assist handler and +sub-operations of complex floating point instructions like transcendental +instructions. +.It Li UOPS_RETIRED.ANY +.Pq Event C2H , Umask 01H +Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2, +others=1; maximum count of 8 per cycle). Most instructions are composed of +one or two micro-ops. Some instructions are decoded into longer sequences +such as repeat instructions, floating point transcendental instructions, and +assists. +Use cmask=1 and invert to count active cycles or stalled cycles +.It Li UOPS_RETIRED.RETIRE_SLOTS +.Pq Event C2H , Umask 02H +Counts the number of retirement slots used each cycle +.It Li UOPS_RETIRED.MACRO_FUSED +.Pq Event C2H , Umask 04H +Counts number of macro-fused uops retired. +.It Li MACHINE_CLEARS.CYCLES +.Pq Event C3H , Umask 01H +Counts the cycles machine clear is asserted. +.It Li MACHINE_CLEARS.MEM_ORDER +.Pq Event C3H , Umask 02H +Counts the number of machine clears due to memory order conflicts. +.It Li MACHINE_CLEARS.SMC +.Pq Event C3H , Umask 04H +Counts the number of times that a program writes to a code section. +Self-modifying code causes a sever penalty in all Intel 64 and IA-32 +processors. The modified cache line is written back to the L2 and L3caches. +.It Li BR_INST_RETIRED.ALL_BRANCHES +.Pq Event C4H , Umask 00H +See Table A-1 +.It Li BR_INST_RETIRED.CONDITIONAL +.Pq Event C4H , Umask 01H +Counts the number of conditional branch instructions retired. +.It Li BR_INST_RETIRED.NEAR_CALL +.Pq Event C4H , Umask 02H +Counts the number of direct & indirect near unconditional calls retired +.It Li BR_INST_RETIRED.ALL_BRANCHES +.Pq Event C4H , Umask 04H +Counts the number of branch instructions retired +.It Li BR_MISP_RETIRED.ALL_BRANCHES +.Pq Event C5H , Umask 00H +See Table A-1 +.It Li BR_MISP_RETIRED.NEAR_CALL +.Pq Event C5H , Umask 02H +Counts mispredicted direct & indirect near unconditional retired calls. +.It Li SSEX_UOPS_RETIRED.PACKED_SINGLE +.Pq Event C7H , Umask 01H +Counts SIMD packed single-precision floating point Uops retired. +.It Li SSEX_UOPS_RETIRED.SCALAR_SINGLE +.Pq Event C7H , Umask 02H +Counts SIMD calar single-precision floating point Uops retired. +.It Li SSEX_UOPS_RETIRED.PACKED_DOUBLE +.Pq Event C7H , Umask 04H +Counts SIMD packed double- precision floating point Uops retired. +.It Li SSEX_UOPS_RETIRED.SCALAR_DOUBLE +.Pq Event C7H , Umask 08H +Counts SIMD scalar double-precision floating point Uops retired. +.It Li SSEX_UOPS_RETIRED.VECTOR_INTEGER +.Pq Event C7H , Umask 10H +Counts 128-bit SIMD vector integer Uops retired. +.It Li ITLB_MISS_RETIRED +.Pq Event C8H , Umask 20H +Counts the number of retired instructions that missed the ITLB when the +instruction was fetched. +.It Li MEM_LOAD_RETIRED.L1D_HIT +.Pq Event CBH , Umask 01H +Counts number of retired loads that hit the L1 data cache. +.It Li MEM_LOAD_RETIRED.L2_HIT +.Pq Event CBH , Umask 02H +Counts number of retired loads that hit the L2 data cache. +.It Li MEM_LOAD_RETIRED.L3_UNSHARED_HIT +.Pq Event CBH , Umask 04H +Counts number of retired loads that hit their own, unshared lines in the L3 +cache. +.It Li MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM +.Pq Event CBH , Umask 08H +Counts number of retired loads that hit in a sibling core's L2 (on die +core). Since the L3 is inclusive of all cores on the package, this is an L3 +hit. This counts both clean or modified hits. +.It Li MEM_LOAD_RETIRED.L3_MISS +.Pq Event CBH , Umask 10H +Counts number of retired loads that miss the L3 cache. The load was +satisfied by a remote socket, local memory or an IOH. +.It Li MEM_LOAD_RETIRED.HIT_LFB +.Pq Event CBH , Umask 40H +Counts number of retired loads that miss the L1D and the address is located +in an allocated line fill buffer and will soon be committed to cache. This +is counting secondary L1D misses. +.It Li MEM_LOAD_RETIRED.DTLB_MISS +.Pq Event CBH , Umask 80H +Counts the number of retired loads that missed the DTLB. The DTLB miss is +not counted if the load operation causes a fault. This event counts loads +from cacheable memory only. The event does not count loads by software +prefetches. Counts both primary and secondary misses to the TLB. +.It Li FP_MMX_TRANS.TO_FP +.Pq Event CCH , Umask 01H +Counts the first floating-point instruction following any MMX instruction. +You can use this event to estimate the penalties for the transitions between +floating-point and MMX technology states. +.It Li FP_MMX_TRANS.TO_MMX +.Pq Event CCH , Umask 02H +Counts the first MMX instruction following a floating-point instruction. You +can use this event to estimate the penalties for the transitions between +floating-point and MMX technology states. +.It Li FP_MMX_TRANS.ANY +.Pq Event CCH , Umask 03H +Counts all transitions from floating point to MMX instructions and from MMX +instructions to floating point instructions. You can use this event to +estimate the penalties for the transitions between floating-point and MMX +technology states. +.It Li MACRO_INSTS.DECODED +.Pq Event D0H , Umask 01H +Counts the number of instructions decoded, (but not necessarily executed or +retired). +.It Li UOPS_DECODED.MS +.Pq Event D1H , Umask 02H +Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS +delivers uops when the instruction is more than 4 uops long or a microcode +assist is occurring. +.It Li UOPS_DECODED.ESP_FOLDING +.Pq Event D1H , Umask 04H +Counts number of stack pointer (ESP) instructions decoded: push , pop , call +, ret, etc. ESP instructions do not generate a Uop to increment or decrement +ESP. Instead, they update an ESP_Offset register that keeps track of the +delta to the current value of the ESP register. +.It Li UOPS_DECODED.ESP_SYNC +.Pq Event D1H , Umask 08H +Counts number of stack pointer (ESP) sync operations where an ESP +instruction is corrected by adding the ESP offset register to the current +value of the ESP register. +.It Li RAT_STALLS.FLAGS +.Pq Event D2H , Umask 01H +Counts the number of cycles during which execution stalled due to several +reasons, one of which is a partial flag register stall. A partial register +stall may occur when two conditions are met: 1) an instruction modifies +some, but not all, of the flags in the flag register and 2) the next +instruction, which depends on flags, depends on flags that were not modified +by this instruction. +.It Li RAT_STALLS.REGISTERS +.Pq Event D2H , Umask 02H +This event counts the number of cycles instruction execution latency became +longer than the defined latency because the instruction used a register that +was partially written by previous instruction. +.It Li RAT_STALLS.ROB_READ_PORT +.Pq Event D2H , Umask 04H +Counts the number of cycles when ROB read port stalls occurred, which did +not allow new micro-ops to enter the out-of-order pipeline. Note that, at +this stage in the pipeline, additional stalls may occur at the same cycle +and prevent the stalled micro-ops from entering the pipe. In such a case, +micro-ops retry entering the execution pipe in the next cycle and the +ROB-read port stall is counted again. +.It Li RAT_STALLS.SCOREBOARD +.Pq Event D2H , Umask 08H +Counts the cycles where we stall due to microarchitecturally required +serialization. Microcode scoreboarding stalls. +.It Li RAT_STALLS.ANY +.Pq Event D2H , Umask 0FH +Counts all Register Allocation Table stall cycles due to: Cycles when ROB +read port stalls occurred, which did not allow new micro-ops to enter the +execution pipe. Cycles when partial register stalls occurred Cycles when +flag stalls occurred Cycles floating-point unit (FPU) status word stalls +occurred. To count each of these conditions separately use the events: +RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and +RAT_STALLS.FPSW. +.It Li SEG_RENAME_STALLS +.Pq Event D4H , Umask 01H +Counts the number of stall cycles due to the lack of renaming resources for +the ES, DS, FS, and GS segment registers. If a segment is renamed but not +retired and a second update to the same segment occurs, a stall occurs in +the front-end of the pipeline until the renamed segment retires. +.It Li ES_REG_RENAMES +.Pq Event D5H , Umask 01H +Counts the number of times the ES segment register is renamed. +.It Li UOP_UNFUSION +.Pq Event DBH , Umask 01H +Counts unfusion events due to floating point exception to a fused uop. +.It Li BR_INST_DECODED +.Pq Event E0H , Umask 01H +Counts the number of branch instructions decoded. +.It Li BPU_MISSED_CALL_RET +.Pq Event E5H , Umask 01H +Counts number of times the Branch Prediciton Unit missed predicting a call +or return branch. +.It Li BACLEAR.CLEAR +.Pq Event E6H , Umask 01H +Counts the number of times the front end is resteered, mainly when the +Branch Prediction Unit cannot provide a correct prediction and this is +corrected by the Branch Address Calculator at the front end. This can occur +if the code has many branches such that they cannot be consumed by the BPU. +Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble +in the instruction fetch pipeline. The effect on total execution time +depends on the surrounding code. +.It Li BACLEAR.BAD_TARGET +.Pq Event E6H , Umask 02H +Counts number of Branch Address Calculator clears (BACLEAR) asserted due to +conditional branch instructions in which there was a target hit but the +direction was wrong. Each BACLEAR asserted by the BAC generates +approximately an 8 cycle bubble in the instruction fetch pipeline. +.It Li BPU_CLEARS.EARLY +.Pq Event E8H , Umask 01H +Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken +branch after incorrectly assuming that it was not taken. +The BPU clear leads to 2 cycle bubble in the Front End. +.It Li BPU_CLEARS.LATE +.Pq Event E8H , Umask 02H +Counts late Branch Prediction Unit clears due to Most Recently Used +conflicts. The PBU clear leads to a 3 cycle bubble in the Front End. +.It Li BPU_CLEARS.ANY +.Pq Event E8H , Umask 03H +Counts all BPU clears. +.It Li L2_TRANSACTIONS.LOAD +.Pq Event F0H , Umask 01H +Counts L2 load operations due to HW prefetch or demand loads. +.It Li L2_TRANSACTIONS.RFO +.Pq Event F0H , Umask 02H +Counts L2 RFO operations due to HW prefetch or demand RFOs. +.It Li L2_TRANSACTIONS.IFETCH +.Pq Event F0H , Umask 04H +Counts L2 instruction fetch operations due to HW prefetch or demand ifetch. +.It Li L2_TRANSACTIONS.PREFETCH +.Pq Event F0H , Umask 08H +Counts L2 prefetch operations. +.It Li L2_TRANSACTIONS.L1D_WB +.Pq Event F0H , Umask 10H +Counts L1D writeback operations to the L2. +.It Li L2_TRANSACTIONS.FILL +.Pq Event F0H , Umask 20H +Counts L2 cache line fill operations due to load, RFO, L1D writeback or +prefetch. +.It Li L2_TRANSACTIONS.WB +.Pq Event F0H , Umask 40H +Counts L2 writeback operations to the L3. +.It Li L2_TRANSACTIONS.ANY +.Pq Event F0H , Umask 80H +Counts all L2 cache operations. +.It Li L2_LINES_IN.S_STATE +.Pq Event F1H , Umask 02H +Counts the number of cache lines allocated in the L2 cache in the S (shared) +state. +.It Li L2_LINES_IN.E_STATE +.Pq Event F1H , Umask 04H +Counts the number of cache lines allocated in the L2 cache in the E +(exclusive) state. +.It Li L2_LINES_IN.ANY +.Pq Event F1H , Umask 07H +Counts the number of cache lines allocated in the L2 cache. +.It Li L2_LINES_OUT.DEMAND_CLEAN +.Pq Event F2H , Umask 01H +Counts L2 clean cache lines evicted by a demand request. +.It Li L2_LINES_OUT.DEMAND_DIRTY +.Pq Event F2H , Umask 02H +Counts L2 dirty (modified) cache lines evicted by a demand request. +.It Li L2_LINES_OUT.PREFETCH_CLEAN +.Pq Event F2H , Umask 04H +Counts L2 clean cache line evicted by a prefetch request. +.It Li L2_LINES_OUT.PREFETCH_DIRTY +.Pq Event F2H , Umask 08H +Counts L2 modified cache line evicted by a prefetch request. +.It Li L2_LINES_OUT.ANY +.Pq Event F2H , Umask 0FH +Counts all L2 cache lines evicted for any reason. +.It Li SQ_MISC.SPLIT_LOCK +.Pq Event F4H , Umask 10H +Counts the number of SQ lock splits across a cache line. +.It Li SQ_FULL_STALL_CYCLES +.Pq Event F6H , Umask 01H +Counts cycles the Super Queue is full. Neither of the threads on this core +will be able to access the uncore. +.It Li FP_ASSIST.ALL +.Pq Event F7H , Umask 01H +Counts the number of floating point operations executed that required +micro-code assist intervention. Assists are required in the following cases: +SSE instructions, (Denormal input when the DAZ flag is off or Underflow +result when the FTZ flag is off): x87 instructions, (NaN or denormal are +loaded to a register or used as input from memory, Division by 0 or +Underflow output). +.It Li FP_ASSIST.OUTPUT +.Pq Event F7H , Umask 02H +Counts number of floating point micro-code assist when the output value +(destination register) is invalid. +.It Li FP_ASSIST.INPUT +.Pq Event F7H , Umask 04H +Counts number of floating point micro-code assist when the input value (one +of the source operands to an FP instruction) is invalid. +.It Li SIMD_INT_64.PACKED_MPY +.Pq Event FDH , Umask 01H +Counts number of SID integer 64 bit packed multiply operations. +.It Li SIMD_INT_64.PACKED_SHIFT +.Pq Event FDH , Umask 02H +Counts number of SID integer 64 bit packed shift operations. +.It Li SIMD_INT_64.PACK +.Pq Event FDH , Umask 04H +Counts number of SID integer 64 bit pack operations. +.It Li SIMD_INT_64.UNPACK +.Pq Event FDH , Umask 08H +Counts number of SID integer 64 bit unpack operations. +.It Li SIMD_INT_64.PACKED_LOGICAL +.Pq Event FDH , Umask 10H +Counts number of SID integer 64 bit logical operations. +.It Li SIMD_INT_64.PACKED_ARITH +.Pq Event FDH , Umask 20H +Counts number of SID integer 64 bit arithmetic operations. +.It Li SIMD_INT_64.SHUFFLE_MOVE +.Pq Event FDH , Umask 40H +Counts number of SID integer 64 bit shift or move operations. +.El +.Ss Event Specifiers (Programmable PMCs) +Core i7 and Xeon 5500 programmable PMCs support the following events as +June 2009 document (removed in December 2009): +.Bl -tag -width indent +.It Li SB_FORWARD.ANY +.Pq Event 02H , Umask 01H +Counts the number of store forwards. +.It Li LOAD_BLOCK.STD +.Pq Event 03H , Umask 01H +Counts the number of loads blocked by a preceding store with unknown data. +.It Li LOAD_BLOCK.ADDRESS_OFFSET +.Pq Event 03H , Umask 04H +Counts the number of loads blocked by a preceding store address. +.It Li LOAD_BLOCK.ADDRESS_OFFSET +.Pq Event 01H , Umask 04H +Counts the cycles of store buffer drains. +.It Li MISALIGN_MEM_REF.LOAD +.Pq Event 05H , Umask 01H +Counts the number of misaligned load references +.It Li MISALIGN_MEM_REF.STORE +.Pq Event 05H , Umask 02H +Counts the number of misaligned store references +.It Li MISALIGN_MEM_REF.ANY +.Pq Event 05H , Umask 03H +Counts the number of misaligned memory references +.It Li STORE_BLOCKS.NOT_STA +.Pq Event 06H , Umask 01H +This event counts the number of load operations delayed caused by preceding +stores whose addresses are known but whose data is unknown, and preceding +stores that conflict with the load but which incompletely overlap the load. +.It Li STORE_BLOCKS.STA +.Pq Event 06H , Umask 02H +This event counts load operations delayed caused by preceding stores whose +addresses are unknown (STA block). +.It Li STORE_BLOCKS.ANY +.Pq Event 06H , Umask 0FH +All loads delayed due to store blocks +.It Li MEMORY_DISAMBIGURATION.RESET +.Pq Event 09H , Umask 01H +Counts memory disambiguration reset cycles +.It Li MEMORY_DISAMBIGURATION.SUCCESS +.Pq Event 09H , Umask 02H +Counts the number of loads that memory disambiguration succeeded +.It Li MEMORY_DISAMBIGURATION.WATCHDOG +.Pq Event 09H , Umask 04H +Counts the number of times the memory disambiguration watchdog kicked in. +.It Li MEMORY_DISAMBIGURATION.WATCH_CYCLES +.Pq Event 09H , Umask 08H +Counts the cycles that the memory disambiguration watchdog is active. +set invert=1, cmask = 1 +.It Li HW_INT.RCV +.Pq Event 1DH , Umask 01H +Number of interrupt received +.It Li HW_INT.CYCLES_MASKED +.Pq Event 1DH , Umask 02H +Number of cycles interrupt are masked +.It Li HW_INT.CYCLES_PENDING_AND_MASKED +.Pq Event 1DH , Umask 04H +Number of cycles interrupts are pending and masked +.It Li HW_INT.CYCLES_PENDING_AND_MASKED +.Pq Event 04H , Umask 04H +Counts number of L2 store RFO requests where the cache line to be loaded is +in the E (exclusive) state. The L1D prefetcher does not issue a RFO +prefetch. +This is a demand RFO request +.It Li HW_INT.CYCLES_PENDING_AND_MASKED +.Pq Event 27H , Umask 04H +LONGEST_LAT_CACH E.MISS +.It Li UOPS_DECODED.DEC0 +.Pq Event 3DH , Umask 01H +Counts micro-ops decoded by decoder 0. +.It Li UOPS_DECODED.DEC0 +.Pq Event 01H , Umask 01H +Counts L1 data cache store RFO requests where the cache line to be loaded is +in the I state. +Counter 0, 1 only +.It Li 0FH +.Pq Event 41H , Umask 41H +L1D_CACHE_ST.MESI +Counts L1 data cache store RFO requests. +Counter 0, 1 only +.It Li DTLB_MISSES.PDE_MISS +.Pq Event 49H , Umask 20H +Number of DTLB cache misses where the low part of the linear to physical +address translation was missed. +.It Li DTLB_MISSES.PDP_MISS +.Pq Event 49H , Umask 40H +Number of DTLB misses where the high part of the linear to physical address +translation was missed. +.It Li DTLB_MISSES.LARGE_WALK_COMPLETED +.Pq Event 49H , Umask 80H +Counts number of completed large page walks due to misses in the STLB. +.It Li SSE_MEM_EXEC.NTA +.Pq Event 4BH , Umask 01H +Counts number of SSE NTA prefetch/weakly-ordered instructions which missed +the L1 data cache. +.It Li SSE_MEM_EXEC.STREAMING_STORES +.Pq Event 4BH , Umask 08H +Counts number of SSE non temporal stores +.It Li SFENCE_CYCLES +.Pq Event 4DH , Umask 01H +Counts store fence cycles +.It Li EPT.EPDE_MISS +.Pq Event 4FH , Umask 02H +Counts Extended Page Directory Entry misses. The Extended Page Directory +cache is used by Virtual Machine operating systems while the guest operating +systems use the standard TLB caches. +.It Li EPT.EPDPE_HIT +.Pq Event 4FH , Umask 04H +Counts Extended Page Directory Pointer Entry hits. +.It Li EPT.EPDPE_MISS +.Pq Event 4FH , Umask 08H +Counts Extended Page Directory Pointer Entry misses. T +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA +.Pq Event 60H , Umask 01H +Counts weighted cycles of offcore demand data read requests. Does not +include L2 prefetch requests. +counter 0 +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE +.Pq Event 60H , Umask 02H +Counts weighted cycles of offcore demand code read requests. Does not +include L2 prefetch requests. +counter 0 +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO +.Pq Event 60H , Umask 04H +Counts weighted cycles of offcore demand RFO requests. Does not include L2 +prefetch requests. +counter 0 +.It Li OFFCORE_REQUESTS_OUTSTANDING.ANY.READ +.Pq Event 60H , Umask 08H +Counts weighted cycles of offcore read requests of any kind. Include L2 +prefetch requests. +counter 0 +.It Li IFU_IVC.FULL +.Pq Event 81H , Umask 01H +Instruction Fetche unit victim cache full. +.It Li IFU_IVC.L1I_EVICTION +.Pq Event 81H , Umask 02H +L1 Instruction cache evictions. +.It Li L1I_OPPORTUNISTIC_HITS +.Pq Event 83H , Umask 01H +Opportunistic hits in streaming. +.It Li ITLB_MISSES.WALK_CYCLES +.Pq Event 85H , Umask 04H +Counts ITLB miss page walk cycles. +.It Li ITLB_MISSES.PMH_BUSY_CYCLES +.Pq Event 85H , Umask 04H +Counts PMH busy cycles. +.It Li ITLB_MISSES.STLB_HIT +.Pq Event 85H , Umask 10H +Counts the number of ITLB misses that hit in the second level TLB. +.It Li ITLB_MISSES.PDE_MISS +.Pq Event 85H , Umask 20H +Number of ITLB misses where the low part of the linear to physical address +translation was missed. +.It Li ITLB_MISSES.PDP_MISS +.Pq Event 85H , Umask 40H +Number of ITLB misses where the high part of the linear to physical address +translation was missed. +.It Li ITLB_MISSES.LARGE_WALK_COMPLETED +.Pq Event 85H , Umask 80H +Counts number of completed large page walks due to misses in the STLB. +.It Li ITLB_MISSES.LARGE_WALK_COMPLETED +.Pq Event 01H , Umask 80H +Counts number of offcore demand data read requests. Does not count L2 +prefetch requests. +.It Li OFFCORE_REQUESTS.DEMAND.READ_CODE +.Pq Event B0H , Umask 02H +Counts number of offcore demand code read requests. Does not count L2 +prefetch requests. +.It Li OFFCORE_REQUESTS.DEMAND.RFO +.Pq Event B0H , Umask 04H +Counts number of offcore demand RFO requests. Does not count L2 prefetch +requests. +.It Li OFFCORE_REQUESTS.ANY.READ +.Pq Event B0H , Umask 08H +Counts number of offcore read requests. Includes L2 prefetch requests. +.It Li OFFCORE_REQUESTS.ANY.RFO +.Pq Event B0H , Umask 10H +Counts number of offcore RFO requests. Includes L2 prefetch requests. +.It Li OFFCORE_REQUESTS.UNCACHED_MEM +.Pq Event B0H , Umask 20H +Counts number of offcore uncached memory requests. +.It Li OFFCORE_REQUESTS.ANY +.Pq Event B0H , Umask 80H +Counts all offcore requests. +.It Li SNOOPQ_REQUESTS_OUTSTANDING.DATA +.Pq Event B3H , Umask 01H +Counts weighted cycles of snoopq requests for data. Counter 0 only +Use cmask=1 to count cycles not empty. +.It Li SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE +.Pq Event B3H , Umask 02H +Counts weighted cycles of snoopq invalidate requests. Counter 0 only +Use cmask=1 to count cycles not empty. +.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE +.Pq Event B3H , Umask 04H +Counts weighted cycles of snoopq requests for code. Counter 0 only +Use cmask=1 to count cycles not empty. +.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE +.Pq Event BAH , Umask 04H +Counts number of TPR reads +.It Li PIC_ACCESSES.TPR_WRITES +.Pq Event BAH , Umask 02H +Counts number of TPR writes +one or two micro-ops. Some instructions are decoded into longer sequences +.It Li MACHINE_CLEARS.FUSION_ASSIST +.Pq Event C3H , Umask 10H +Counts the number of macro-fusion assists +Counts SIMD packed single- precision floating point Uops retired. +.It Li BOGUS_BR +.Pq Event E4H , Umask 01H +Counts the number of bogus branches. +.It Li L2_HW_PREFETCH.HIT +.Pq Event F3H , Umask 01H +Count L2 HW prefetcher detector hits +.It Li L2_HW_PREFETCH.ALLOC +.Pq Event F3H , Umask 02H +Count L2 HW prefetcher allocations +.It Li L2_HW_PREFETCH.DATA_TRIGGER +.Pq Event F3H , Umask 04H +Count L2 HW data prefetcher triggered +.It Li L2_HW_PREFETCH.CODE_TRIGGER +.Pq Event F3H , Umask 08H +Count L2 HW code prefetcher triggered +.It Li L2_HW_PREFETCH.DCA_TRIGGER +.Pq Event F3H , Umask 10H +Count L2 HW DCA prefetcher triggered +.It Li L2_HW_PREFETCH.KICK_START +.Pq Event F3H , Umask 20H +Count L2 HW prefetcher kick started +.It Li SQ_MISC.PROMOTION +.Pq Event F4H , Umask 01H +Counts the number of L2 secondary misses that hit the Super Queue. +.It Li SQ_MISC.PROMOTION_POST_GO +.Pq Event F4H , Umask 02H +Counts the number of L2 secondary misses during the Super Queue filling L2. +.It Li SQ_MISC.LRU_HINTS +.Pq Event F4H , Umask 04H +Counts number of Super Queue LRU hints sent to L3. +.It Li SQ_MISC.FILL_DROPPED +.Pq Event F4H , Umask 08H +Counts the number of SQ L2 fills dropped due to L2 busy. +.It Li SEGMENT_REG_LOADS +.Pq Event F8H , Umask 01H +Counts number of segment register loads. +.El +.Sh SEE ALSO +.Xr pmc 3 , +.Xr pmc.atom 3 , +.Xr pmc.core 3 , +.Xr pmc.iaf 3 , +.Xr pmc.ucf 3 , +.Xr pmc.k7 3 , +.Xr pmc.k8 3 , +.Xr pmc.p4 3 , +.Xr pmc.p5 3 , +.Xr pmc.p6 3 , +.Xr pmc.corei7uc 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , +.Xr pmc.tsc 3 , +.Xr pmc_cpuinfo 3 , +.Xr pmclog 3 , +.Xr hwpmc 4 +.Sh HISTORY +The +.Nm pmc +library first appeared in +.Fx 6.0 . +.Sh AUTHORS +The +.Lb libpmc +library was written by +.An "Joseph Koshy" +.Aq jkoshy@FreeBSD.org . diff --git a/lib/libpmc/pmc.corei7uc.3 b/lib/libpmc/pmc.corei7uc.3 new file mode 100644 index 0000000..2c1b3dd --- /dev/null +++ b/lib/libpmc/pmc.corei7uc.3 @@ -0,0 +1,880 @@ +.\" Copyright (c) 2010 Fabien Thomas. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" This software is provided by Joseph Koshy ``as is'' and +.\" any express or implied warranties, including, but not limited to, the +.\" implied warranties of merchantability and fitness for a particular purpose +.\" are disclaimed. in no event shall Joseph Koshy be liable +.\" for any direct, indirect, incidental, special, exemplary, or consequential +.\" damages (including, but not limited to, procurement of substitute goods +.\" or services; loss of use, data, or profits; or business interruption) +.\" however caused and on any theory of liability, whether in contract, strict +.\" liability, or tort (including negligence or otherwise) arising in any way +.\" out of the use of this software, even if advised of the possibility of +.\" such damage. +.\" +.\" $FreeBSD$ +.\" +.Dd March 24, 2010 +.Os +.Dt PMC.COREI7UC 3 +.Sh NAME +.Nm pmc.corei7uc +.Nd uncore measurement events for +.Tn Intel +.Tn Core i7 and Xeon 5500 +family CPUs +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Sh DESCRIPTION +.Tn Intel +.Tn "Core i7" +CPUs contain PMCs conforming to version 2 of the +.Tn Intel +performance measurement architecture. +These CPUs contain 2 classes of PMCs: +.Bl -tag -width "Li PMC_CLASS_UCP" +.It Li PMC_CLASS_UCF +Fixed-function counters that count only one hardware event per counter. +.It Li PMC_CLASS_UCP +Programmable counters that may be configured to count one of a defined +set of hardware events. +.El +.Pp +The number of PMCs available in each class and their widths need to be +determined at run time by calling +.Xr pmc_cpuinfo 3 . +.Pp +Intel Core i7 and Xeon 5500 PMCs are documented in +.Rs +.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual" +.%T "Volume 3B: System Programming Guide, Part 2" +.%N "Order Number: 253669-033US" +.%D December 2009 +.%Q "Intel Corporation" +.Re +.Ss COREI7 AND XEON 5500 UNCORE FIXED FUNCTION PMCS +These PMCs and their supported events are documented in +.Xr pmc.ucf 3 . +.Ss COREI7 AND XEON 5500 UNCORE PROGRAMMABLE PMCS +The programmable PMCs support the following capabilities: +.Bl -column "PMC_CAP_INTERRUPT" "Support" +.It Em Capability Ta Em Support +.It PMC_CAP_CASCADE Ta \&No +.It PMC_CAP_EDGE Ta Yes +.It PMC_CAP_INTERRUPT Ta \&No +.It PMC_CAP_INVERT Ta Yes +.It PMC_CAP_READ Ta Yes +.It PMC_CAP_PRECISE Ta \&No +.It PMC_CAP_SYSTEM Ta \&No +.It PMC_CAP_TAGGING Ta \&No +.It PMC_CAP_THRESHOLD Ta Yes +.It PMC_CAP_USER Ta \&No +.It PMC_CAP_WRITE Ta Yes +.El +.Ss Event Qualifiers +Event specifiers for these PMCs support the following common +qualifiers: +.Bl -tag -width indent +.It Li cmask= Ns Ar value +Configure the PMC to increment only if the number of configured +events measured in a cycle is greater than or equal to +.Ar value . +.It Li edge +Configure the PMC to count the number of de-asserted to asserted +transitions of the conditions expressed by the other qualifiers. +If specified, the counter will increment only once whenever a +condition becomes true, irrespective of the number of clocks during +which the condition remains true. +.It Li inv +Invert the sense of comparison when the +.Dq Li cmask +qualifier is present, making the counter increment when the number of +events per cycle is less than the value specified by the +.Dq Li cmask +qualifier. +.El +.Ss Event Specifiers (Programmable PMCs) +Core i7 and Xeon 5500 uncore programmable PMCs support the following events: +.Bl -tag -width indent +.It Li GQ_CYCLES_FULL.READ_TRACKER +.Pq Event 00H , Umask 01H +Uncore cycles Global Queue read tracker is full. +.It Li GQ_CYCLES_FULL.WRITE_TRACKER +.Pq Event 00H , Umask 02H +Uncore cycles Global Queue write tracker is full. +.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER +.Pq Event 00H , Umask 04H +Uncore cycles Global Queue peer probe tracker is full. The peer probe +tracker queue tracks snoops from the IOH and remote sockets. +.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER +.Pq Event 01H , Umask 01H +Uncore cycles were Global Queue read tracker has at least one valid entry. +.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER +.Pq Event 01H , Umask 02H +Uncore cycles were Global Queue write tracker has at least one valid entry. +.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER +.Pq Event 01H , Umask 04H +Uncore cycles were Global Queue peer probe tracker has at least one valid +entry. The peer probe tracker queue tracks IOH and remote socket snoops. +.It Li GQ_ALLOC.READ_TRACKER +.Pq Event 03H , Umask 01H +Counts the number of tread tracker allocate to deallocate entries. The GQ +read tracker allocate to deallocate occupancy count is divided by the count +to obtain the average read tracker latency. +.It Li GQ_ALLOC.RT_L3_MISS +.Pq Event 03H , Umask 02H +Counts the number GQ read tracker entries for which a full cache line read +has missed the L3. The GQ read tracker L3 miss to fill occupancy count is +divided by this count to obtain the average cache line read L3 miss latency. +The latency represents the time after which the L3 has determined that the +cache line has missed. The time between a GQ read tracker allocation and the +L3 determining that the cache line has missed is the average L3 hit latency. +The total L3 cache line read miss latency is the hit latency + L3 miss +latency. +.It Li GQ_ALLOC.RT_TO_L3_RESP +.Pq Event 03H , Umask 04H +Counts the number of GQ read tracker entries that are allocated in the read +tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy +count is divided by this count to obtain the average L3 hit latency. +.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED +.Pq Event 03H , Umask 08H +Counts the number of GQ read tracker entries that are allocated in the read +tracker, have missed in the L3 and have not acquired a Request Transaction +ID. The GQ read tracker L3 miss to RTID acquired occupancy count is +divided by this count to obtain the average latency for a read L3 miss to +acquire an RTID. +.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED +.Pq Event 03H , Umask 10H +Counts the number of GQ write tracker entries that are allocated in the +write tracker, have missed in the L3 and have not acquired a Request +Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is +divided by this count to obtain the average latency for a write L3 miss to +acquire an RTID. +.It Li GQ_ALLOC.WRITE_TRACKER +.Pq Event 03H , Umask 20H +Counts the number of GQ write tracker entries that are allocated in the +write tracker queue that miss the L3. The GQ write tracker occupancy count +is divided by the this count to obtain the average L3 write miss latency. +.It Li GQ_ALLOC.PEER_PROBE_TRACKER +.Pq Event 03H , Umask 40H +Counts the number of GQ peer probe tracker (snoop) entries that are +allocated in the peer probe tracker queue that miss the L3. The GQ peer +probe occupancy count is divided by this count to obtain the average L3 peer +probe miss latency. +.It Li GQ_DATA.FROM_QPI +.Pq Event 04H , Umask 01H +Cycles Global Queue Quickpath Interface input data port is busy importing +data from the Quickpath Interface. Each cycle the input port can transfer 8 +or 16 bytes of data. +.It Li GQ_DATA.FROM_QMC +.Pq Event 04H , Umask 02H +Cycles Global Queue Quickpath Memory Interface input data port is busy +importing data from the Quickpath Memory Interface. Each cycle the input +port can transfer 8 or 16 bytes of data. +.It Li GQ_DATA.FROM_L3 +.Pq Event 04H , Umask 04H +Cycles GQ L3 input data port is busy importing data from the Last Level +Cache. Each cycle the input port can transfer 32 bytes of data. +.It Li GQ_DATA.FROM_CORES_02 +.Pq Event 04H , Umask 08H +Cycles GQ Core 0 and 2 input data port is busy importing data from processor +cores 0 and 2. Each cycle the input port can transfer 32 bytes of data. +.It Li GQ_DATA.FROM_CORES_13 +.Pq Event 04H , Umask 10H +Cycles GQ Core 1 and 3 input data port is busy importing data from processor +cores 1 and 3. Each cycle the input port can transfer 32 bytes of data. +.It Li GQ_DATA.TO_QPI_QMC +.Pq Event 05H , Umask 01H +Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath +Interface or Quickpath Memory Interface. Each cycle the output port can +transfer 32 bytes of data. +.It Li GQ_DATA.TO_L3 +.Pq Event 05H , Umask 02H +Cycles GQ L3 output data port is busy sending data to the Last Level Cache. +Each cycle the output port can transfer 32 bytes of data. +.It Li GQ_DATA.TO_CORES +.Pq Event 05H , Umask 04H +Cycles GQ Core output data port is busy sending data to the Cores. Each +cycle the output port can transfer 32 bytes of data. +.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE +.Pq Event 06H , Umask 01H +Number of snoop responses to the local home that L3 does not have the +referenced cache line. +.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE +.Pq Event 06H , Umask 02H +Number of snoop responses to the local home that L3 has the referenced line +cached in the S state. +.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE +.Pq Event 06H , Umask 04H +Number of responses to code or data read snoops to the local home that the +L3 has the referenced cache line in the E state. The L3 cache line state is +changed to the S state and the line is forwarded to the local home in the S +state. +.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE +.Pq Event 06H , Umask 08H +Number of responses to read invalidate snoops to the local home that the L3 +has the referenced cache line in the M state. The L3 cache line state is +invalidated and the line is forwarded to the local home in the M state. +.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT +.Pq Event 06H , Umask 10H +Number of conflict snoop responses sent to the local home. +.It Li SNP_RESP_TO_LOCAL_HOME.WB +.Pq Event 06H , Umask 20H +Number of responses to code or data read snoops to the local home that the +L3 has the referenced line cached in the M state. +.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE +.Pq Event 07H , Umask 01H +Number of snoop responses to a remote home that L3 does not have the +referenced cache line. +.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE +.Pq Event 07H , Umask 02H +Number of snoop responses to a remote home that L3 has the referenced line +cached in the S state. +.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE +.Pq Event 07H , Umask 04H +Number of responses to code or data read snoops to a remote home that the L3 +has the referenced cache line in the E state. The L3 cache line state is +changed to the S state and the line is forwarded to the remote home in the S +state. +.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE +.Pq Event 07H , Umask 08H +Number of responses to read invalidate snoops to a remote home that the L3 +has the referenced cache line in the M state. The L3 cache line state is +invalidated and the line is forwarded to the remote home in the M state. +.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT +.Pq Event 07H , Umask 10H +Number of conflict snoop responses sent to the local home. +.It Li SNP_RESP_TO_REMOTE_HOME.WB +.Pq Event 07H , Umask 20H +Number of responses to code or data read snoops to a remote home that the L3 +has the referenced line cached in the M state. +.It Li SNP_RESP_TO_REMOTE_HOME.HITM +.Pq Event 07H , Umask 24H +Number of HITM snoop responses to a remote home +.It Li L3_HITS.READ +.Pq Event 08H , Umask 01H +Number of code read, data read and RFO requests that hit in the L3 +.It Li L3_HITS.WRITE +.Pq Event 08H , Umask 02H +Number of writeback requests that hit in the L3. Writebacks from the cores +will always result in L3 hits due to the inclusive property of the L3. +.It Li L3_HITS.PROBE +.Pq Event 08H , Umask 04H +Number of snoops from IOH or remote sockets that hit in the L3. +.It Li L3_HITS.ANY +.Pq Event 08H , Umask 03H +Number of reads and writes that hit the L3. +.It Li L3_MISS.READ +.Pq Event 09H , Umask 01H +Number of code read, data read and RFO requests that miss the L3. +.It Li L3_MISS.WRITE +.Pq Event 09H , Umask 02H +Number of writeback requests that miss the L3. Should always be zero as +writebacks from the cores will always result in L3 hits due to the inclusive +property of the L3. +.It Li L3_MISS.PROBE +.Pq Event 09H , Umask 04H +Number of snoops from IOH or remote sockets that miss the L3. +.It Li L3_MISS.ANY +.Pq Event 09H , Umask 03H +Number of reads and writes that miss the L3. +.It Li L3_LINES_IN.M_STATE +.Pq Event 0AH , Umask 01H +Counts the number of L3 lines allocated in M state. The only time a cache +line is allocated in the M state is when the line was forwarded in M state +is forwarded due to a Snoop Read Invalidate Own request. +.It Li L3_LINES_IN.E_STATE +.Pq Event 0AH , Umask 02H +Counts the number of L3 lines allocated in E state. +.It Li L3_LINES_IN.S_STATE +.Pq Event 0AH , Umask 04H +Counts the number of L3 lines allocated in S state. +.It Li L3_LINES_IN.F_STATE +.Pq Event 0AH , Umask 08H +Counts the number of L3 lines allocated in F state. +.It Li L3_LINES_IN.ANY +.Pq Event 0AH , Umask 0FH +Counts the number of L3 lines allocated in any state. +.It Li L3_LINES_OUT.M_STATE +.Pq Event 0BH , Umask 01H +Counts the number of L3 lines victimized that were in the M state. When the +victim cache line is in M state, the line is written to its home cache agent +which can be either local or remote. +.It Li L3_LINES_OUT.E_STATE +.Pq Event 0BH , Umask 02H +Counts the number of L3 lines victimized that were in the E state. +.It Li L3_LINES_OUT.S_STATE +.Pq Event 0BH , Umask 04H +Counts the number of L3 lines victimized that were in the S state. +.It Li L3_LINES_OUT.I_STATE +.Pq Event 0BH , Umask 08H +Counts the number of L3 lines victimized that were in the I state. +.It Li L3_LINES_OUT.F_STATE +.Pq Event 0BH , Umask 10H +Counts the number of L3 lines victimized that were in the F state. +.It Li L3_LINES_OUT.ANY +.Pq Event 0BH , Umask 1FH +Counts the number of L3 lines victimized in any state. +.It Li QHL_REQUESTS.IOH_READS +.Pq Event 20H , Umask 01H +Counts number of Quickpath Home Logic read requests from the IOH. +.It Li QHL_REQUESTS.IOH_WRITES +.Pq Event 20H , Umask 02H +Counts number of Quickpath Home Logic write requests from the IOH. +.It Li QHL_REQUESTS.REMOTE_READS +.Pq Event 20H , Umask 04H +Counts number of Quickpath Home Logic read requests from a remote socket. +.It Li QHL_REQUESTS.REMOTE_WRITES +.Pq Event 20H , Umask 08H +Counts number of Quickpath Home Logic write requests from a remote socket. +.It Li QHL_REQUESTS.LOCAL_READS +.Pq Event 20H , Umask 10H +Counts number of Quickpath Home Logic read requests from the local socket. +.It Li QHL_REQUESTS.LOCAL_WRITES +.Pq Event 20H , Umask 20H +Counts number of Quickpath Home Logic write requests from the local socket. +.It Li QHL_CYCLES_FULL.IOH +.Pq Event 21H , Umask 01H +Counts uclk cycles all entries in the Quickpath Home Logic IOH are full. +.It Li QHL_CYCLES_FULL.REMOTE +.Pq Event 21H , Umask 02H +Counts uclk cycles all entries in the Quickpath Home Logic remote tracker +are full. +.It Li QHL_CYCLES_FULL.LOCAL +.Pq Event 21H , Umask 04H +Counts uclk cycles all entries in the Quickpath Home Logic local tracker are +full. +.It Li QHL_CYCLES_NOT_EMPTY.IOH +.Pq Event 22H , Umask 01H +Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy. +.It Li QHL_CYCLES_NOT_EMPTY.REMOTE +.Pq Event 22H , Umask 02H +Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is +busy. +.It Li QHL_CYCLES_NOT_EMPTY.LOCAL +.Pq Event 22H , Umask 04H +Counts uclk cycles all entries in the Quickpath Home Logic local tracker is +busy. +.It Li QHL_OCCUPANCY.IOH +.Pq Event 23H , Umask 01H +QHL IOH tracker allocate to deallocate read occupancy. +.It Li QHL_OCCUPANCY.REMOTE +.Pq Event 23H , Umask 02H +QHL remote tracker allocate to deallocate read occupancy. +.It Li QHL_OCCUPANCY.LOCAL +.Pq Event 23H , Umask 04H +QHL local tracker allocate to deallocate read occupancy. +.It Li QHL_ADDRESS_CONFLICTS.2WAY +.Pq Event 24H , Umask 02H +Counts number of QHL Active Address Table (AAT) entries that saw a max of 2 +conflicts. The AAT is a structure that tracks requests that are in conflict. +The requests themselves are in the home tracker entries. The count is +reported when an AAT entry deallocates. +.It Li QHL_ADDRESS_CONFLICTS.3WAY +.Pq Event 24H , Umask 04H +Counts number of QHL Active Address Table (AAT) entries that saw a max of 3 +conflicts. The AAT is a structure that tracks requests that are in conflict. +The requests themselves are in the home tracker entries. The count is +reported when an AAT entry deallocates. +.It Li QHL_CONFLICT_CYCLES.IOH +.Pq Event 25H , Umask 01H +Counts cycles the Quickpath Home Logic IOH Tracker contains two or more +requests with an address conflict. A max of 3 requests can be in conflict. +.It Li QHL_CONFLICT_CYCLES.REMOTE +.Pq Event 25H , Umask 02H +Counts cycles the Quickpath Home Logic Remote Tracker contains two or more +requests with an address conflict. A max of 3 requests can be in conflict. +.It Li QHL_CONFLICT_CYCLES.LOCAL +.Pq Event 25H , Umask 04H +Counts cycles the Quickpath Home Logic Local Tracker contains two or more +requests with an address conflict. A max of 3 requests can be in conflict. +.It Li QHL_TO_QMC_BYPASS +.Pq Event 26H , Umask 01H +Counts number or requests to the Quickpath Memory Controller that bypass the +Quickpath Home Logic. All local accesses can be bypassed. For remote +requests, only read requests can be bypassed. +.It Li QMC_NORMAL_FULL.READ.CH0 +.Pq Event 27H , Umask 01H +Uncore cycles all the entries in the DRAM channel 0 medium or low priority +queue are occupied with read requests. +.It Li QMC_NORMAL_FULL.READ.CH1 +.Pq Event 27H , Umask 02H +Uncore cycles all the entries in the DRAM channel 1 medium or low priority +queue are occupied with read requests. +.It Li QMC_NORMAL_FULL.READ.CH2 +.Pq Event 27H , Umask 04H +Uncore cycles all the entries in the DRAM channel 2 medium or low priority +queue are occupied with read requests. +.It Li QMC_NORMAL_FULL.WRITE.CH0 +.Pq Event 27H , Umask 08H +Uncore cycles all the entries in the DRAM channel 0 medium or low priority +queue are occupied with write requests. +.It Li QMC_NORMAL_FULL.WRITE.CH1 +.Pq Event 27H , Umask 10H +Counts cycles all the entries in the DRAM channel 1 medium or low priority +queue are occupied with write requests. +.It Li QMC_NORMAL_FULL.WRITE.CH2 +.Pq Event 27H , Umask 20H +Uncore cycles all the entries in the DRAM channel 2 medium or low priority +queue are occupied with write requests. +.It Li QMC_ISOC_FULL.READ.CH0 +.Pq Event 28H , Umask 01H +Counts cycles all the entries in the DRAM channel 0 high priority queue are +occupied with isochronous read requests. +.It Li QMC_ISOC_FULL.READ.CH1 +.Pq Event 28H , Umask 02H +Counts cycles all the entries in the DRAM channel 1high priority queue are +occupied with isochronous read requests. +.It Li QMC_ISOC_FULL.READ.CH2 +.Pq Event 28H , Umask 04H +Counts cycles all the entries in the DRAM channel 2 high priority queue are +occupied with isochronous read requests. +.It Li QMC_ISOC_FULL.WRITE.CH0 +.Pq Event 28H , Umask 08H +Counts cycles all the entries in the DRAM channel 0 high priority queue are +occupied with isochronous write requests. +.It Li QMC_ISOC_FULL.WRITE.CH1 +.Pq Event 28H , Umask 10H +Counts cycles all the entries in the DRAM channel 1 high priority queue are +occupied with isochronous write requests. +.It Li QMC_ISOC_FULL.WRITE.CH2 +.Pq Event 28H , Umask 20H +Counts cycles all the entries in the DRAM channel 2 high priority queue are +occupied with isochronous write requests. +.It Li QMC_BUSY.READ.CH0 +.Pq Event 29H , Umask 01H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +read request to DRAM channel 0. +.It Li QMC_BUSY.READ.CH1 +.Pq Event 29H , Umask 02H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +read request to DRAM channel 1. +.It Li QMC_BUSY.READ.CH2 +.Pq Event 29H , Umask 04H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +read request to DRAM channel 2. +.It Li QMC_BUSY.WRITE.CH0 +.Pq Event 29H , Umask 08H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +write request to DRAM channel 0. +.It Li QMC_BUSY.WRITE.CH1 +.Pq Event 29H , Umask 10H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +write request to DRAM channel 1. +.It Li QMC_BUSY.WRITE.CH2 +.Pq Event 29H , Umask 20H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +write request to DRAM channel 2. +.It Li QMC_OCCUPANCY.CH0 +.Pq Event 2AH , Umask 01H +IMC channel 0 normal read request occupancy. +.It Li QMC_OCCUPANCY.CH1 +.Pq Event 2AH , Umask 02H +IMC channel 1 normal read request occupancy. +.It Li QMC_OCCUPANCY.CH2 +.Pq Event 2AH , Umask 04H +IMC channel 2 normal read request occupancy. +.It Li QMC_ISSOC_OCCUPANCY.CH0 +.Pq Event 2BH , Umask 01H +IMC channel 0 issoc read request occupancy. +.It Li QMC_ISSOC_OCCUPANCY.CH1 +.Pq Event 2BH , Umask 02H +IMC channel 1 issoc read request occupancy. +.It Li QMC_ISSOC_OCCUPANCY.CH2 +.Pq Event 2BH , Umask 04H +IMC channel 2 issoc read request occupancy. +.It Li QMC_ISSOC_READS.ANY +.Pq Event 2BH , Umask 07H +IMC issoc read request occupancy. +.It Li QMC_NORMAL_READS.CH0 +.Pq Event 2CH , Umask 01H +Counts the number of Quickpath Memory Controller channel 0 medium and low +priority read requests. The QMC channel 0 normal read occupancy divided by +this count provides the average QMC channel 0 read latency. +.It Li QMC_NORMAL_READS.CH1 +.Pq Event 2CH , Umask 02H +Counts the number of Quickpath Memory Controller channel 1 medium and low +priority read requests. The QMC channel 1 normal read occupancy divided by +this count provides the average QMC channel 1 read latency. +.It Li QMC_NORMAL_READS.CH2 +.Pq Event 2CH , Umask 04H +Counts the number of Quickpath Memory Controller channel 2 medium and low +priority read requests. The QMC channel 2 normal read occupancy divided by +this count provides the average QMC channel 2 read latency. +.It Li QMC_NORMAL_READS.ANY +.Pq Event 2CH , Umask 07H +Counts the number of Quickpath Memory Controller medium and low priority +read requests. The QMC normal read occupancy divided by this count provides +the average QMC read latency. +.It Li QMC_HIGH_PRIORITY_READS.CH0 +.Pq Event 2DH , Umask 01H +Counts the number of Quickpath Memory Controller channel 0 high priority +isochronous read requests. +.It Li QMC_HIGH_PRIORITY_READS.CH1 +.Pq Event 2DH , Umask 02H +Counts the number of Quickpath Memory Controller channel 1 high priority +isochronous read requests. +.It Li QMC_HIGH_PRIORITY_READS.CH2 +.Pq Event 2DH , Umask 04H +Counts the number of Quickpath Memory Controller channel 2 high priority +isochronous read requests. +.It Li QMC_HIGH_PRIORITY_READS.ANY +.Pq Event 2DH , Umask 07H +Counts the number of Quickpath Memory Controller high priority isochronous +read requests. +.It Li QMC_CRITICAL_PRIORITY_READS.CH0 +.Pq Event 2EH , Umask 01H +Counts the number of Quickpath Memory Controller channel 0 critical priority +isochronous read requests. +.It Li QMC_CRITICAL_PRIORITY_READS.CH1 +.Pq Event 2EH , Umask 02H +Counts the number of Quickpath Memory Controller channel 1 critical priority +isochronous read requests. +.It Li QMC_CRITICAL_PRIORITY_READS.CH2 +.Pq Event 2EH , Umask 04H +Counts the number of Quickpath Memory Controller channel 2 critical priority +isochronous read requests. +.It Li QMC_CRITICAL_PRIORITY_READS.ANY +.Pq Event 2EH , Umask 07H +Counts the number of Quickpath Memory Controller critical priority +isochronous read requests. +.It Li QMC_WRITES.FULL.CH0 +.Pq Event 2FH , Umask 01H +Counts number of full cache line writes to DRAM channel 0. +.It Li QMC_WRITES.FULL.CH1 +.Pq Event 2FH , Umask 02H +Counts number of full cache line writes to DRAM channel 1. +.It Li QMC_WRITES.FULL.CH2 +.Pq Event 2FH , Umask 04H +Counts number of full cache line writes to DRAM channel 2. +.It Li QMC_WRITES.FULL.ANY +.Pq Event 2FH , Umask 07H +Counts number of full cache line writes to DRAM. +.It Li QMC_WRITES.PARTIAL.CH0 +.Pq Event 2FH , Umask 08H +Counts number of partial cache line writes to DRAM channel 0. +.It Li QMC_WRITES.PARTIAL.CH1 +.Pq Event 2FH , Umask 10H +Counts number of partial cache line writes to DRAM channel 1. +.It Li QMC_WRITES.PARTIAL.CH2 +.Pq Event 2FH , Umask 20H +Counts number of partial cache line writes to DRAM channel 2. +.It Li QMC_WRITES.PARTIAL.ANY +.Pq Event 2FH , Umask 38H +Counts number of partial cache line writes to DRAM. +.It Li QMC_CANCEL.CH0 +.Pq Event 30H , Umask 01H +Counts number of DRAM channel 0 cancel requests. +.It Li QMC_CANCEL.CH1 +.Pq Event 30H , Umask 02H +Counts number of DRAM channel 1 cancel requests. +.It Li QMC_CANCEL.CH2 +.Pq Event 30H , Umask 04H +Counts number of DRAM channel 2 cancel requests. +.It Li QMC_CANCEL.ANY +.Pq Event 30H , Umask 07H +Counts number of DRAM cancel requests. +.It Li QMC_PRIORITY_UPDATES.CH0 +.Pq Event 31H , Umask 01H +Counts number of DRAM channel 0 priority updates. A priority update occurs +when an ISOC high or critical request is received by the QHL and there is a +matching request with normal priority that has already been issued to the +QMC. In this instance, the QHL will send a priority update to QMC to +expedite the request. +.It Li QMC_PRIORITY_UPDATES.CH1 +.Pq Event 31H , Umask 02H +Counts number of DRAM channel 1 priority updates. A priority update occurs +when an ISOC high or critical request is received by the QHL and there is a +matching request with normal priority that has already been issued to the +QMC. In this instance, the QHL will send a priority update to QMC to +expedite the request. +.It Li QMC_PRIORITY_UPDATES.CH2 +.Pq Event 31H , Umask 04H +Counts number of DRAM channel 2 priority updates. A priority update occurs +when an ISOC high or critical request is received by the QHL and there is a +matching request with normal priority that has already been issued to the +QMC. In this instance, the QHL will send a priority update to QMC to +expedite the request. +.It Li QMC_PRIORITY_UPDATES.ANY +.Pq Event 31H , Umask 07H +Counts number of DRAM priority updates. A priority update occurs when an +ISOC high or critical request is received by the QHL and there is a matching +request with normal priority that has already been issued to the QMC. In +this instance, the QHL will send a priority update to QMC to expedite the +request. +.It Li QHL_FRC_ACK_CNFLTS.LOCAL +.Pq Event 33H , Umask 04H +Counts number of Force Acknowledge Conflict messages sent by the Quickpath +Home Logic to the local home. +.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0 +.Pq Event 40H , Umask 01H +Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled +due to lack of a VNA and VN0 credit. Note that this event does not filter +out when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0 +.Pq Event 40H , Umask 02H +Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled +due to lack of a VNA and VN0 credit. Note that this event does not filter +out when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0 +.Pq Event 40H , Umask 04H +Counts cycles the Quickpath outbound link 0 non-data response virtual +channel is stalled due to lack of a VNA and VN0 credit. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1 +.Pq Event 40H , Umask 08H +Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled +due to lack of a VNA and VN0 credit. Note that this event does not filter +out when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1 +.Pq Event 40H , Umask 10H +Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled +due to lack of a VNA and VN0 credit. Note that this event does not filter +out when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1 +.Pq Event 40H , Umask 20H +Counts cycles the Quickpath outbound link 1 non-data response virtual +channel is stalled due to lack of a VNA and VN0 credit. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0 +.Pq Event 40H , Umask 07H +Counts cycles the Quickpath outbound link 0 virtual channels are stalled due +to lack of a VNA and VN0 credit. Note that this event does not filter out +when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1 +.Pq Event 40H , Umask 38H +Counts cycles the Quickpath outbound link 1 virtual channels are stalled due +to lack of a VNA and VN0 credit. Note that this event does not filter out +when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0 +.Pq Event 41H , Umask 01H +Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is +stalled due to lack of VNA and VN0 credits. Note that this event does not +filter out when a flit would not have been selected for arbitration because +another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0 +.Pq Event 41H , Umask 02H +Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual +channel is stalled due to lack of VNA and VN0 credits. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0 +.Pq Event 41H , Umask 04H +Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual +channel is stalled due to lack of VNA and VN0 credits. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1 +.Pq Event 41H , Umask 08H +Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is +stalled due to lack of VNA and VN0 credits. Note that this event does not +filter out when a flit would not have been selected for arbitration because +another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1 +.Pq Event 41H , Umask 10H +Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual +channel is stalled due to lack of VNA and VN0 credits. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1 +.Pq Event 41H , Umask 20H +Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual +channel is stalled due to lack of VNA and VN0 credits. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0 +.Pq Event 41H , Umask 07H +Counts cycles the Quickpath outbound link 0 virtual channels are stalled due +to lack of VNA and VN0 credits. Note that this event does not filter out +when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1 +.Pq Event 41H , Umask 38H +Counts cycles the Quickpath outbound link 1 virtual channels are stalled due +to lack of VNA and VN0 credits. Note that this event does not filter out +when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_HEADER.BUSY.LINK_0 +.Pq Event 42H , Umask 02H +Number of cycles that the header buffer in the Quickpath Interface outbound +link 0 is busy. +.It Li QPI_TX_HEADER.BUSY.LINK_1 +.Pq Event 42H , Umask 08H +Number of cycles that the header buffer in the Quickpath Interface outbound +link 1 is busy. +.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0 +.Pq Event 43H , Umask 01H +Number of cycles that snoop packets incoming to the Quickpath Interface link +0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) +does not have any available entries. +.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1 +.Pq Event 43H , Umask 02H +Number of cycles that snoop packets incoming to the Quickpath Interface link +1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) +does not have any available entries. +.It Li DRAM_OPEN.CH0 +.Pq Event 60H , Umask 01H +Counts number of DRAM Channel 0 open commands issued either for read or +write. To read or write data, the referenced DRAM page must first be opened. +.It Li DRAM_OPEN.CH1 +.Pq Event 60H , Umask 02H +Counts number of DRAM Channel 1 open commands issued either for read or +write. To read or write data, the referenced DRAM page must first be opened. +.It Li DRAM_OPEN.CH2 +.Pq Event 60H , Umask 04H +Counts number of DRAM Channel 2 open commands issued either for read or +write. To read or write data, the referenced DRAM page must first be opened. +.It Li DRAM_PAGE_CLOSE.CH0 +.Pq Event 61H , Umask 01H +DRAM channel 0 command issued to CLOSE a page due to page idle timer +expiration. Closing a page is done by issuing a precharge. +.It Li DRAM_PAGE_CLOSE.CH1 +.Pq Event 61H , Umask 02H +DRAM channel 1 command issued to CLOSE a page due to page idle timer +expiration. Closing a page is done by issuing a precharge. +.It Li DRAM_PAGE_CLOSE.CH2 +.Pq Event 61H , Umask 04H +DRAM channel 2 command issued to CLOSE a page due to page idle timer +expiration. Closing a page is done by issuing a precharge. +.It Li DRAM_PAGE_MISS.CH0 +.Pq Event 62H , Umask 01H +Counts the number of precharges (PRE) that were issued to DRAM channel 0 +because there was a page miss. A page miss refers to a situation in which a +page is currently open and another page from the same bank needs to be +opened. The new page experiences a page miss. Closing of the old page is +done by issuing a precharge. +.It Li DRAM_PAGE_MISS.CH1 +.Pq Event 62H , Umask 02H +Counts the number of precharges (PRE) that were issued to DRAM channel 1 +because there was a page miss. A page miss refers to a situation in which a +page is currently open and another page from the same bank needs to be +opened. The new page experiences a page miss. Closing of the old page is +done by issuing a precharge. +.It Li DRAM_PAGE_MISS.CH2 +.Pq Event 62H , Umask 04H +Counts the number of precharges (PRE) that were issued to DRAM channel 2 +because there was a page miss. A page miss refers to a situation in which a +page is currently open and another page from the same bank needs to be +opened. The new page experiences a page miss. Closing of the old page is +done by issuing a precharge. +.It Li DRAM_READ_CAS.CH0 +.Pq Event 63H , Umask 01H +Counts the number of times a read CAS command was issued on DRAM channel 0. +.It Li DRAM_READ_CAS.AUTOPRE_CH0 +.Pq Event 63H , Umask 02H +Counts the number of times a read CAS command was issued on DRAM channel 0 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_READ_CAS.CH1 +.Pq Event 63H , Umask 04H +Counts the number of times a read CAS command was issued on DRAM channel 1. +.It Li DRAM_READ_CAS.AUTOPRE_CH1 +.Pq Event 63H , Umask 08H +Counts the number of times a read CAS command was issued on DRAM channel 1 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_READ_CAS.CH2 +.Pq Event 63H , Umask 10H +Counts the number of times a read CAS command was issued on DRAM channel 2. +.It Li DRAM_READ_CAS.AUTOPRE_CH2 +.Pq Event 63H , Umask 20H +Counts the number of times a read CAS command was issued on DRAM channel 2 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_WRITE_CAS.CH0 +.Pq Event 64H , Umask 01H +Counts the number of times a write CAS command was issued on DRAM channel 0. +.It Li DRAM_WRITE_CAS.AUTOPRE_CH0 +.Pq Event 64H , Umask 02H +Counts the number of times a write CAS command was issued on DRAM channel 0 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_WRITE_CAS.CH1 +.Pq Event 64H , Umask 04H +Counts the number of times a write CAS command was issued on DRAM channel 1. +.It Li DRAM_WRITE_CAS.AUTOPRE_CH1 +.Pq Event 64H , Umask 08H +Counts the number of times a write CAS command was issued on DRAM channel 1 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_WRITE_CAS.CH2 +.Pq Event 64H , Umask 10H +Counts the number of times a write CAS command was issued on DRAM channel 2. +.It Li DRAM_WRITE_CAS.AUTOPRE_CH2 +.Pq Event 64H , Umask 20H +Counts the number of times a write CAS command was issued on DRAM channel 2 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_REFRESH.CH0 +.Pq Event 65H , Umask 01H +Counts number of DRAM channel 0 refresh commands. DRAM loses data content +over time. In order to keep correct data content, the data values have to be +refreshed periodically. +.It Li DRAM_REFRESH.CH1 +.Pq Event 65H , Umask 02H +Counts number of DRAM channel 1 refresh commands. DRAM loses data content +over time. In order to keep correct data content, the data values have to be +refreshed periodically. +.It Li DRAM_REFRESH.CH2 +.Pq Event 65H , Umask 04H +Counts number of DRAM channel 2 refresh commands. DRAM loses data content +over time. In order to keep correct data content, the data values have to be +refreshed periodically. +.It Li DRAM_PRE_ALL.CH0 +.Pq Event 66H , Umask 01H +Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close +all open pages in a rank. PREALL is issued when the DRAM needs to be +refreshed or needs to go into a power down mode. +.It Li DRAM_PRE_ALL.CH1 +.Pq Event 66H , Umask 02H +Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close +all open pages in a rank. PREALL is issued when the DRAM needs to be +refreshed or needs to go into a power down mode. +.It Li DRAM_PRE_ALL.CH2 +.Pq Event 66H , Umask 04H +Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close +all open pages in a rank. PREALL is issued when the DRAM needs to be +refreshed or needs to go into a power down mode. +.El +.Sh SEE ALSO +.Xr pmc 3 , +.Xr pmc.atom 3 , +.Xr pmc.core 3 , +.Xr pmc.iaf 3 , +.Xr pmc.ucf 3 , +.Xr pmc.k7 3 , +.Xr pmc.k8 3 , +.Xr pmc.p4 3 , +.Xr pmc.p5 3 , +.Xr pmc.p6 3 , +.Xr pmc.corei7 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , +.Xr pmc.tsc 3 , +.Xr pmc_cpuinfo 3 , +.Xr pmclog 3 , +.Xr hwpmc 4 +.Sh HISTORY +The +.Nm pmc +library first appeared in +.Fx 6.0 . +.Sh AUTHORS +The +.Lb libpmc +library was written by +.An "Joseph Koshy" +.Aq jkoshy@FreeBSD.org . diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3 new file mode 100644 index 0000000..c8f2468 --- /dev/null +++ b/lib/libpmc/pmc.ucf.3 @@ -0,0 +1,115 @@ +.\" Copyright (c) 2010 Fabien Thomas. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" This software is provided by Joseph Koshy ``as is'' and +.\" any express or implied warranties, including, but not limited to, the +.\" implied warranties of merchantability and fitness for a particular purpose +.\" are disclaimed. in no event shall Joseph Koshy be liable +.\" for any direct, indirect, incidental, special, exemplary, or consequential +.\" damages (including, but not limited to, procurement of substitute goods +.\" or services; loss of use, data, or profits; or business interruption) +.\" however caused and on any theory of liability, whether in contract, strict +.\" liability, or tort (including negligence or otherwise) arising in any way +.\" out of the use of this software, even if advised of the possibility of +.\" such damage. +.\" +.\" $FreeBSD$ +.\" +.Dd March 30, 2010 +.Os +.Dt PMC.UCF 3 +.Sh NAME +.Nm pmc.ucf +.Nd measurement events for +.Tn Intel +uncore fixed function performance counters. +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Sh DESCRIPTION +Each fixed-function PMC measures a specific hardware event. +The number of fixed-function PMCs implemented in a CPU can vary. +The number of fixed-function PMCs present can be determined at runtime +by using function +.Xr pmc_cpuinfo 3 . +.Pp +Intel uncore fixed-function PMCs are documented in +.Rs +.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual" +.%T "Volume 3B: System Programming Guide, Part 2" +.%N "Order Number: 253669-033US" +.%D December 2009 +.%Q "Intel Corporation" +.Re +.Pp +.Ss PMC Capabilities +Fixed-function PMCs support the following capabilities: +.Bl -column "PMC_CAP_INTERRUPT" "Support" +.It Em Capability Ta Em Support +.It PMC_CAP_CASCADE Ta \&No +.It PMC_CAP_EDGE Ta \&No +.It PMC_CAP_INTERRUPT Ta \&No +.It PMC_CAP_INVERT Ta \&No +.It PMC_CAP_READ Ta Yes +.It PMC_CAP_PRECISE Ta \&No +.It PMC_CAP_SYSTEM Ta \&No +.It PMC_CAP_TAGGING Ta \&No +.It PMC_CAP_THRESHOLD Ta \&No +.It PMC_CAP_USER Ta \&No +.It PMC_CAP_WRITE Ta Yes +.El +.Ss Class Name Prefix +These PMCs are named using a class name prefix of +.Dq Li ucf- . +.Ss Event Specifiers (Fixed Function PMCs) +The fixed function PMCs are selectable using the following +event names: +.Bl -tag -width indent +.It Li UCLOCK +.Pq Fixed Function Counter 0 +The fixed-function uncore counter increments at the rate of the U-clock. +The frequency of the uncore clock domain can be determined from the uncore +clock ratio which is available in the PCI configuration space register at +offset C0H under device number 0 and Function 0. +.El +.Sh SEE ALSO +.Xr pmc 3 , +.Xr pmc.atom 3 , +.Xr pmc.core 3 , +.Xr pmc.core2 3 , +.Xr pmc.iaf 3 , +.Xr pmc.k7 3 , +.Xr pmc.k8 3 , +.Xr pmc.p4 3 , +.Xr pmc.p5 3 , +.Xr pmc.p6 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , +.Xr pmc.tsc 3 , +.Xr pmc_cpuinfo 3 , +.Xr pmclog 3 , +.Xr hwpmc 4 +.Sh HISTORY +The +.Nm pmc +library first appeared in +.Fx 6.0 . +.Sh AUTHORS +The +.Lb libpmc +library was written by +.An "Joseph Koshy" +.Aq jkoshy@FreeBSD.org . + + diff --git a/lib/libpmc/pmc.westmere.3 b/lib/libpmc/pmc.westmere.3 new file mode 100644 index 0000000..0b87e50 --- /dev/null +++ b/lib/libpmc/pmc.westmere.3 @@ -0,0 +1,1329 @@ +.\" Copyright (c) 2010 Fabien Thomas. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" This software is provided by Joseph Koshy ``as is'' and +.\" any express or implied warranties, including, but not limited to, the +.\" implied warranties of merchantability and fitness for a particular purpose +.\" are disclaimed. in no event shall Joseph Koshy be liable +.\" for any direct, indirect, incidental, special, exemplary, or consequential +.\" damages (including, but not limited to, procurement of substitute goods +.\" or services; loss of use, data, or profits; or business interruption) +.\" however caused and on any theory of liability, whether in contract, strict +.\" liability, or tort (including negligence or otherwise) arising in any way +.\" out of the use of this software, even if advised of the possibility of +.\" such damage. +.\" +.\" $FreeBSD$ +.\" +.Dd March 24, 2010 +.Os +.Dt PMC.WESTMERE 3 +.Sh NAME +.Nm pmc.westmere +.Nd measurement events for +.Tn Intel +.Tn Westmere +family CPUs +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Sh DESCRIPTION +.Tn Intel +.Tn "Westmere" +CPUs contain PMCs conforming to version 2 of the +.Tn Intel +performance measurement architecture. +These CPUs may contain up to three classes of PMCs: +.Bl -tag -width "Li PMC_CLASS_IAP" +.It Li PMC_CLASS_IAF +Fixed-function counters that count only one hardware event per counter. +.It Li PMC_CLASS_IAP +Programmable counters that may be configured to count one of a defined +set of hardware events. +.El +.Pp +The number of PMCs available in each class and their widths need to be +determined at run time by calling +.Xr pmc_cpuinfo 3 . +.Pp +Intel Westmere PMCs are documented in +.Rs +.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual" +.%T "Volume 3B: System Programming Guide, Part 2" +.%N "Order Number: 253669-033US" +.%D December 2009 +.%Q "Intel Corporation" +.Re +.Ss WESTMERE FIXED FUNCTION PMCS +These PMCs and their supported events are documented in +.Xr pmc.iaf 3 . +.Ss WESTMERE PROGRAMMABLE PMCS +The programmable PMCs support the following capabilities: +.Bl -column "PMC_CAP_INTERRUPT" "Support" +.It Em Capability Ta Em Support +.It PMC_CAP_CASCADE Ta \&No +.It PMC_CAP_EDGE Ta Yes +.It PMC_CAP_INTERRUPT Ta Yes +.It PMC_CAP_INVERT Ta Yes +.It PMC_CAP_READ Ta Yes +.It PMC_CAP_PRECISE Ta \&No +.It PMC_CAP_SYSTEM Ta Yes +.It PMC_CAP_TAGGING Ta \&No +.It PMC_CAP_THRESHOLD Ta Yes +.It PMC_CAP_USER Ta Yes +.It PMC_CAP_WRITE Ta Yes +.El +.Ss Event Qualifiers +Event specifiers for these PMCs support the following common +qualifiers: +.Bl -tag -width indent +.It Li rsp= Ns Ar value +Configure the Off-core Response bits. +.Bl -tag -width indent +.It Li DMND_DATA_RD +Counts the number of demand and DCU prefetch data reads of full +and partial cachelines as well as demand data page table entry +cacheline reads. Does not count L2 data read prefetches or +instruction fetches. +.It Li DMND_RFO +Counts the number of demand and DCU prefetch reads for ownership +(RFO) requests generated by a write to data cacheline. Does not +count L2 RFO. +.It Li DMND_IFETCH +Counts the number of demand and DCU prefetch instruction cacheline +reads. Does not count L2 code read prefetches. +WB +Counts the number of writeback (modified to exclusive) transactions. +.It Li PF_DATA_RD +Counts the number of data cacheline reads generated by L2 prefetchers. +.It Li PF_RFO +Counts the number of RFO requests generated by L2 prefetchers. +.It Li PF_IFETCH +Counts the number of code reads generated by L2 prefetchers. +.It Li OTHER +Counts one of the following transaction types, including L3 invalidate, +I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, +lock, unlock, split lock. +.It Li UNCORE_HIT +L3 Hit: local or remote home requests that hit L3 cache in the uncore +with no coherency actions required (snooping). +.It Li OTHER_CORE_HIT_SNP +L3 Hit: local or remote home requests that hit L3 cache in the uncore +and was serviced by another core with a cross core snoop where no modified +copies were found (clean). +.It Li OTHER_CORE_HITM +L3 Hit: local or remote home requests that hit L3 cache in the uncore +and was serviced by another core with a cross core snoop where modified +copies were found (HITM). +.It Li REMOTE_CACHE_FWD +L3 Miss: local homed requests that missed the L3 cache and was serviced +by forwarded data following a cross package snoop where no modified +copies found. (Remote home requests are not counted) +.It Li REMOTE_DRAM +L3 Miss: remote home requests that missed the L3 cache and were serviced +by remote DRAM. +.It Li LOCAL_DRAM +L3 Miss: local home requests that missed the L3 cache and were serviced +by local DRAM. +.It Li NON_DRAM +Non-DRAM requests that were serviced by IOH. +.El +.It Li cmask= Ns Ar value +Configure the PMC to increment only if the number of configured +events measured in a cycle is greater than or equal to +.Ar value . +.It Li edge +Configure the PMC to count the number of de-asserted to asserted +transitions of the conditions expressed by the other qualifiers. +If specified, the counter will increment only once whenever a +condition becomes true, irrespective of the number of clocks during +which the condition remains true. +.It Li inv +Invert the sense of comparison when the +.Dq Li cmask +qualifier is present, making the counter increment when the number of +events per cycle is less than the value specified by the +.Dq Li cmask +qualifier. +.It Li os +Configure the PMC to count events happening at processor privilege +level 0. +.It Li usr +Configure the PMC to count events occurring at privilege levels 1, 2 +or 3. +.El +.Pp +If neither of the +.Dq Li os +or +.Dq Li usr +qualifiers are specified, the default is to enable both. +.Ss Event Specifiers (Programmable PMCs) +Westmere programmable PMCs support the following events: +.Bl -tag -width indent +.It Li LOAD_BLOCK.OVERLAP_STORE +.Pq Event 03H , Umask 02H +Loads that partially overlap an earlier store +.It Li SB_DRAIN.ANY +.Pq Event 04H , Umask 07H +All Store buffer stall cycles +.It Li MISALIGN_MEMORY.STORE +.Pq Event 05H , Umask 02H +All store referenced with misaligned address +.It Li STORE_BLOCKS.AT_RET +.Pq Event 06H , Umask 04H +Counts number of loads delayed with at-Retirement block code. The following +loads need to be executed at retirement and wait for all senior stores on +the same thread to be drained: load splitting across 4K boundary (page +split), load accessing uncacheable (UC or USWC) memory, load lock, and load +with page table in UC or USWC memory region. +.It Li STORE_BLOCKS.L1D_BLOCK +.Pq Event 06H , Umask 08H +Cacheable loads delayed with L1D block code +.It Li PARTIAL_ADDRESS_ALIAS +.Pq Event 07H , Umask 01H +Counts false dependency due to partial address aliasing +.It Li DTLB_LOAD_MISSES.ANY +.Pq Event 08H , Umask 01H +Counts all load misses that cause a page walk +.It Li DTLB_LOAD_MISSES.WALK_COMPLETED +.Pq Event 08H , Umask 02H +Counts number of completed page walks due to load miss in the STLB. +.It Li DTLB_LOAD_MISSES.WALK_CYCLES +.Pq Event 08H , Umask 04H +Cycles PMH is busy with a page walk due to a load miss in the STLB. +.It Li DTLB_LOAD_MISSES.STLB_HIT +.Pq Event 08H , Umask 10H +Number of cache load STLB hits +.It Li DTLB_LOAD_MISSES.PDE_MISS +.Pq Event 08H , Umask 20H +Number of DTLB cache load misses where the low part of the linear to +physical address translation was missed. +.It Li MEM_INST_RETIRED.LOADS +.Pq Event 0BH , Umask 01H +Counts the number of instructions with an architecturally-visible store +retired on the architected path. +In conjunction with ld_lat facility +.It Li MEM_INST_RETIRED.STORES +.Pq Event 0BH , Umask 02H +Counts the number of instructions with an architecturally-visible store +retired on the architected path. +In conjunction with ld_lat facility +.It Li MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD +.Pq Event 0BH , Umask 10H +Counts the number of instructions exceeding the latency specified with +ld_lat facility. +In conjunction with ld_lat facility +.It Li MEM_STORE_RETIRED.DTLB_MISS +.Pq Event 0CH , Umask 01H +The event counts the number of retired stores that missed the DTLB. The DTLB +miss is not counted if the store operation causes a fault. Does not counter +prefetches. Counts both primary and secondary misses to the TLB +.It Li UOPS_ISSUED.ANY +.Pq Event 0EH , Umask 01H +Counts the number of Uops issued by the Register Allocation Table to the +Reservation Station, i.e. the UOPs issued from the front end to the back +end. +.It Li UOPS_ISSUED.STALLED_CYCLES +.Pq Event 0EH , Umask 01H +Counts the number of cycles no Uops issued by the Register Allocation Table +to the Reservation Station, i.e. the UOPs issued from the front end to the +back end. +set invert=1, cmask = 1 +.It Li UOPS_ISSUED.FUSED +.Pq Event 0EH , Umask 02H +Counts the number of fused Uops that were issued from the Register +Allocation Table to the Reservation Station. +.It Li MEM_UNCORE_RETIRED.LOCAL_HITM +.Pq Event 0FH , Umask 02H +Load instructions retired that HIT modified data in sibling core (Precise +Event) +.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM_AND_REMOTE_CACHE_HIT +.Pq Event 0FH , Umask 08H +Load instructions retired local dram and remote cache HIT data sources +(Precise Event) +.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM +.Pq Event 0FH , Umask 10H +Load instructions retired with a data source of local DRAM or locally homed +remote cache HITM (Precise Event) +.It Li MEM_UNCORE_RETIRED.REMOTE_DRAM +.Pq Event 0FH , Umask 20H +Load instructions retired remote DRAM and remote home-remote cache HITM +(Precise Event) +.It Li MEM_UNCORE_RETIRED.UNCACHEABLE +.Pq Event 0FH , Umask 80H +Load instructions retired I/O (Precise Event) +.It Li FP_COMP_OPS_EXE.X87 +.Pq Event 10H , Umask 01H +Counts the number of FP Computational Uops Executed. The number of FADD, +FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer +DIVs, and IDIVs. This event does not distinguish an FADD used in the middle +of a transcendental flow from a separate FADD instruction. +.It Li FP_COMP_OPS_EXE.MMX +.Pq Event 10H , Umask 02H +Counts number of MMX Uops executed. +.It Li FP_COMP_OPS_EXE.SSE_FP +.Pq Event 10H , Umask 04H +Counts number of SSE and SSE2 FP uops executed. +.It Li FP_COMP_OPS_EXE.SSE2_INTEGER +.Pq Event 10H , Umask 08H +Counts number of SSE2 integer uops executed. +.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED +.Pq Event 10H , Umask 10H +Counts number of SSE FP packed uops executed. +.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR +.Pq Event 10H , Umask 20H +Counts number of SSE FP scalar uops executed. +.It Li FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION +.Pq Event 10H , Umask 40H +Counts number of SSE* FP single precision uops executed. +.It Li FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION +.Pq Event 10H , Umask 80H +Counts number of SSE* FP double precision uops executed. +.It Li SIMD_INT_128.PACKED_MPY +.Pq Event 12H , Umask 01H +Counts number of 128 bit SIMD integer multiply operations. +.It Li SIMD_INT_128.PACKED_SHIFT +.Pq Event 12H , Umask 02H +Counts number of 128 bit SIMD integer shift operations. +.It Li SIMD_INT_128.PACK +.Pq Event 12H , Umask 04H +Counts number of 128 bit SIMD integer pack operations. +.It Li SIMD_INT_128.UNPACK +.Pq Event 12H , Umask 08H +Counts number of 128 bit SIMD integer unpack operations. +.It Li SIMD_INT_128.PACKED_LOGICAL +.Pq Event 12H , Umask 10H +Counts number of 128 bit SIMD integer logical operations. +.It Li SIMD_INT_128.PACKED_ARITH +.Pq Event 12H , Umask 20H +Counts number of 128 bit SIMD integer arithmetic operations. +.It Li SIMD_INT_128.SHUFFLE_MOVE +.Pq Event 12H , Umask 40H +Counts number of 128 bit SIMD integer shuffle and move operations. +.It Li LOAD_DISPATCH.RS +.Pq Event 13H , Umask 01H +Counts number of loads dispatched from the Reservation Station that bypass +the Memory Order Buffer. +.It Li LOAD_DISPATCH.RS_DELAYED +.Pq Event 13H , Umask 02H +Counts the number of delayed RS dispatches at the stage latch. If an RS +dispatch can not bypass to LB, it has another chance to dispatch from the +one-cycle delayed staging latch before it is written into the LB. +.It Li LOAD_DISPATCH.MOB +.Pq Event 13H , Umask 04H +Counts the number of loads dispatched from the Reservation Station to the +Memory Order Buffer. +.It Li LOAD_DISPATCH.ANY +.Pq Event 13H , Umask 07H +Counts all loads dispatched from the Reservation Station. +.It Li ARITH.CYCLES_DIV_BUSY +.Pq Event 14H , Umask 01H +Counts the number of cycles the divider is busy executing divide or square +root operations. The divide can be integer, X87 or Streaming SIMD Extensions +(SSE). The square root operation can be either X87 or SSE. +Set 'edge =1, invert=1, cmask=1' to count the number of divides. +Count may be incorrect When SMT is on +.It Li ARITH.MUL +.Pq Event 14H , Umask 02H +Counts the number of multiply operations executed. This includes integer as +well as floating point multiply operations but excludes DPPS mul and MPSAD. +Count may be incorrect When SMT is on +.It Li INST_QUEUE_WRITES +.Pq Event 17H , Umask 01H +Counts the number of instructions written into the instruction queue every +cycle. +.It Li INST_DECODED.DEC0 +.Pq Event 18H , Umask 01H +Counts number of instructions that require decoder 0 to be decoded. Usually, +this means that the instruction maps to more than 1 uop +.It Li TWO_UOP_INSTS_DECODED +.Pq Event 19H , Umask 01H +An instruction that generates two uops was decoded +.It Li INST_QUEUE_WRITE_CYCLES +.Pq Event 1EH , Umask 01H +This event counts the number of cycles during which instructions are written +to the instruction queue. Dividing this counter by the number of +instructions written to the instruction queue (INST_QUEUE_WRITES) yields the +average number of instructions decoded each cycle. If this number is less +than four and the pipe stalls, this indicates that the decoder is failing to +decode enough instructions per cycle to sustain the 4-wide pipeline. +If SSE* instructions that are 6 bytes or longer arrive one after another, +then front end throughput may limit execution speed. In such case, +.It Li LSD_OVERFLOW +.Pq Event 20H , Umask 01H +Number of loops that can not stream from the instruction queue. +.It Li L2_RQSTS.LD_HIT +.Pq Event 24H , Umask 01H +Counts number of loads that hit the L2 cache. L2 loads include both L1D +demand misses as well as L1D prefetches. L2 loads can be rejected for +various reasons. Only non rejected loads are counted. +.It Li L2_RQSTS.LD_MISS +.Pq Event 24H , Umask 02H +Counts the number of loads that miss the L2 cache. L2 loads include both L1D +demand misses as well as L1D prefetches. +.It Li L2_RQSTS.LOADS +.Pq Event 24H , Umask 03H +Counts all L2 load requests. L2 loads include both L1D demand misses as well +as L1D prefetches. +.It Li L2_RQSTS.RFO_HIT +.Pq Event 24H , Umask 04H +Counts the number of store RFO requests that hit the L2 cache. L2 RFO +requests include both L1D demand RFO misses as well as L1D RFO prefetches. +Count includes WC memory requests, where the data is not fetched but the +permission to write the line is required. +.It Li L2_RQSTS.RFO_MISS +.Pq Event 24H , Umask 08H +Counts the number of store RFO requests that miss the L2 cache. L2 RFO +requests include both L1D demand RFO misses as well as L1D RFO prefetches. +.It Li L2_RQSTS.RFOS +.Pq Event 24H , Umask 0CH +Counts all L2 store RFO requests. L2 RFO requests include both L1D demand +RFO misses as well as L1D RFO prefetches.. +.It Li L2_RQSTS.IFETCH_HIT +.Pq Event 24H , Umask 10H +Counts number of instruction fetches that hit the L2 cache. L2 instruction +fetches include both L1I demand misses as well as L1I instruction +prefetches. +.It Li L2_RQSTS.IFETCH_MISS +.Pq Event 24H , Umask 20H +Counts number of instruction fetches that miss the L2 cache. L2 instruction +fetches include both L1I demand misses as well as L1I instruction +prefetches. +.It Li L2_RQSTS.IFETCHES +.Pq Event 24H , Umask 30H +Counts all instruction fetches. L2 instruction fetches include both L1I +demand misses as well as L1I instruction prefetches. +.It Li L2_RQSTS.PREFETCH_HIT +.Pq Event 24H , Umask 40H +Counts L2 prefetch hits for both code and data. +.It Li L2_RQSTS.PREFETCH_MISS +.Pq Event 24H , Umask 80H +Counts L2 prefetch misses for both code and data. +.It Li L2_RQSTS.PREFETCHES +.Pq Event 24H , Umask C0H +Counts all L2 prefetches for both code and data. +.It Li L2_RQSTS.MISS +.Pq Event 24H , Umask AAH +Counts all L2 misses for both code and data. +.It Li L2_RQSTS.REFERENCES +.Pq Event 24H , Umask FFH +Counts all L2 requests for both code and data. +.It Li L2_DATA_RQSTS.DEMAND.I_STATE +.Pq Event 26H , Umask 01H +Counts number of L2 data demand loads where the cache line to be loaded is +in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D +demand misses and L1D prefetches. +.It Li L2_DATA_RQSTS.DEMAND.S_STATE +.Pq Event 26H , Umask 02H +Counts number of L2 data demand loads where the cache line to be loaded is +in the S (shared) state. L2 demand loads are both L1D demand misses and L1D +prefetches. +.It Li L2_DATA_RQSTS.DEMAND.E_STATE +.Pq Event 26H , Umask 04H +Counts number of L2 data demand loads where the cache line to be loaded is +in the E (exclusive) state. L2 demand loads are both L1D demand misses and +L1D prefetches. +.It Li L2_DATA_RQSTS.DEMAND.M_STATE +.Pq Event 26H , Umask 08H +Counts number of L2 data demand loads where the cache line to be loaded is +in the M (modified) state. L2 demand loads are both L1D demand misses and +L1D prefetches. +.It Li L2_DATA_RQSTS.DEMAND.MESI +.Pq Event 26H , Umask 0FH +Counts all L2 data demand requests. L2 demand loads are both L1D demand +misses and L1D prefetches. +.It Li L2_DATA_RQSTS.PREFETCH.I_STATE +.Pq Event 26H , Umask 10H +Counts number of L2 prefetch data loads where the cache line to be loaded is +in the I (invalid) state, i.e. a cache miss. +.It Li L2_DATA_RQSTS.PREFETCH.S_STATE +.Pq Event 26H , Umask 20H +Counts number of L2 prefetch data loads where the cache line to be loaded is +in the S (shared) state. A prefetch RFO will miss on an S state line, while +a prefetch read will hit on an S state line. +.It Li L2_DATA_RQSTS.PREFETCH.E_STATE +.Pq Event 26H , Umask 40H +Counts number of L2 prefetch data loads where the cache line to be loaded is +in the E (exclusive) state. +.It Li L2_DATA_RQSTS.PREFETCH.M_STATE +.Pq Event 26H , Umask 80H +Counts number of L2 prefetch data loads where the cache line to be loaded is +in the M (modified) state. +.It Li L2_DATA_RQSTS.PREFETCH.MESI +.Pq Event 26H , Umask F0H +Counts all L2 prefetch requests. +.It Li L2_DATA_RQSTS.ANY +.Pq Event 26H , Umask FFH +Counts all L2 data requests. +.It Li L2_WRITE.RFO.I_STATE +.Pq Event 27H , Umask 01H +Counts number of L2 demand store RFO requests where the cache line to be +loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher +does not issue a RFO prefetch. +This is a demand RFO request +.It Li L2_WRITE.RFO.S_STATE +.Pq Event 27H , Umask 02H +Counts number of L2 store RFO requests where the cache line to be loaded is +in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,. +This is a demand RFO request +.It Li L2_WRITE.RFO.M_STATE +.Pq Event 27H , Umask 08H +Counts number of L2 store RFO requests where the cache line to be loaded is +in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch. +This is a demand RFO request +.It Li L2_WRITE.RFO.HIT +.Pq Event 27H , Umask 0EH +Counts number of L2 store RFO requests where the cache line to be loaded is +in either the S, E or M states. The L1D prefetcher does not issue a RFO +prefetch. +This is a demand RFO request +.It Li L2_WRITE.RFO.MESI +.Pq Event 27H , Umask 0FH +Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO +prefetch. +This is a demand RFO request +.It Li L2_WRITE.LOCK.I_STATE +.Pq Event 27H , Umask 10H +Counts number of L2 demand lock RFO requests where the cache line to be +loaded is in the I (invalid) state, i.e. a cache miss. +.It Li L2_WRITE.LOCK.S_STATE +.Pq Event 27H , Umask 20H +Counts number of L2 lock RFO requests where the cache line to be loaded is +in the S (shared) state. +.It Li L2_WRITE.LOCK.E_STATE +.Pq Event 27H , Umask 40H +Counts number of L2 demand lock RFO requests where the cache line to be +loaded is in the E (exclusive) state. +.It Li L2_WRITE.LOCK.M_STATE +.Pq Event 27H , Umask 80H +Counts number of L2 demand lock RFO requests where the cache line to be +loaded is in the M (modified) state. +.It Li L2_WRITE.LOCK.HIT +.Pq Event 27H , Umask E0H +Counts number of L2 demand lock RFO requests where the cache line to be +loaded is in either the S, E, or M state. +.It Li L2_WRITE.LOCK.MESI +.Pq Event 27H , Umask F0H +Counts all L2 demand lock RFO requests. +.It Li L1D_WB_L2.I_STATE +.Pq Event 28H , Umask 01H +Counts number of L1 writebacks to the L2 where the cache line to be written +is in the I (invalid) state, i.e. a cache miss. +.It Li L1D_WB_L2.S_STATE +.Pq Event 28H , Umask 02H +Counts number of L1 writebacks to the L2 where the cache line to be written +is in the S state. +.It Li L1D_WB_L2.E_STATE +.Pq Event 28H , Umask 04H +Counts number of L1 writebacks to the L2 where the cache line to be written +is in the E (exclusive) state. +.It Li L1D_WB_L2.M_STATE +.Pq Event 28H , Umask 08H +Counts number of L1 writebacks to the L2 where the cache line to be written +is in the M (modified) state. +.It Li L1D_WB_L2.MESI +.Pq Event 28H , Umask 0FH +Counts all L1 writebacks to the L2. +.It Li L3_LAT_CACHE.REFERENCE +.Pq Event 2EH , Umask 02H +Counts uncore Last Level Cache references. Because cache hierarchy, cache +sizes and other implementation-specific characteristics; value comparison to +estimate performance differences is not recommended. +see Table A-1 +.It Li L3_LAT_CACHE.MISS +.Pq Event 2EH , Umask 01H +Counts uncore Last Level Cache misses. Because cache hierarchy, cache sizes +and other implementation-specific characteristics; value comparison to +estimate performance differences is not recommended. +see Table A-1 +.It Li CPU_CLK_UNHALTED.THREAD_P +.Pq Event 3CH , Umask 00H +Counts the number of thread cycles while the thread is not in a halt state. +The thread enters the halt state when it is running the HLT instruction. The +core frequency may change from time to time due to power or thermal +throttling. +see Table A-1 +.It Li CPU_CLK_UNHALTED.REF_P +.Pq Event 3CH , Umask 01H +Increments at the frequency of TSC when not halted. +see Table A-1 +.It Li DTLB_MISSES.ANY +.Pq Event 49H , Umask 01H +Counts the number of misses in the STLB which causes a page walk. +.It Li DTLB_MISSES.WALK_COMPLETED +.Pq Event 49H , Umask 02H +Counts number of misses in the STLB which resulted in a completed page walk. +.It Li DTLB_MISSES.WALK_CYCLES +.Pq Event 49H , Umask 04H +Counts cycles of page walk due to misses in the STLB. +.It Li DTLB_MISSES.STLB_HIT +.Pq Event 49H , Umask 10H +Counts the number of DTLB first level misses that hit in the second level +TLB. This event is only relevant if the core contains multiple DTLB levels. +.It Li DTLB_MISSES.LARGE_WALK_COMPLETED +.Pq Event 49H , Umask 80H +Counts number of completed large page walks due to misses in the STLB. +.It Li LOAD_HIT_PRE +.Pq Event 4CH , Umask 01H +Counts load operations sent to the L1 data cache while a previous SSE +prefetch instruction to the same cache line has started prefetching but has +not yet finished. +.It Li L1D_PREFETCH.REQUESTS +.Pq Event 4EH , Umask 01H +Counts number of hardware prefetch requests dispatched out of the prefetch +FIFO. +.It Li L1D_PREFETCH.MISS +.Pq Event 4EH , Umask 02H +Counts number of hardware prefetch requests that miss the L1D. There are two +prefetchers in the L1D. A streamer, which predicts lines sequentially after +this one should be fetched, and the IP prefetcher that remembers access +patterns for the current instruction. The streamer prefetcher stops on an +L1D hit, while the IP prefetcher does not. +.It Li L1D_PREFETCH.TRIGGERS +.Pq Event 4EH , Umask 04H +Counts number of prefetch requests triggered by the Finite State Machine and +pushed into the prefetch FIFO. Some of the prefetch requests are dropped due +to overwrites or competition between the IP index prefetcher and streamer +prefetcher. The prefetch FIFO contains 4 entries. +.It Li EPT.WALK_CYCLES +.Pq Event 4FH , Umask 10H +Counts Extended Page walk cycles. +.It Li L1D.REPL +.Pq Event 51H , Umask 01H +Counts the number of lines brought into the L1 data cache. +Counter 0, 1 only +.It Li L1D.M_REPL +.Pq Event 51H , Umask 02H +Counts the number of modified lines brought into the L1 data cache. +Counter 0, 1 only +.It Li L1D.M_EVICT +.Pq Event 51H , Umask 04H +Counts the number of modified lines evicted from the L1 data cache due to +replacement. +Counter 0, 1 only +.It Li L1D.M_SNOOP_EVICT +.Pq Event 51H , Umask 08H +Counts the number of modified lines evicted from the L1 data cache due to +snoop HITM intervention. +Counter 0, 1 only +.It Li L1D_CACHE_PREFETCH_LOCK_FB_HIT +.Pq Event 52H , Umask 01H +Counts the number of cacheable load lock speculated instructions accepted +into the fill buffer. +.It Li L1D_CACHE_LOCK_FB_HIT +.Pq Event 53H , Umask 01H +Counts the number of cacheable load lock speculated or retired instructions +accepted into the fill buffer. +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA +.Pq Event 60H , Umask 01H +Counts weighted cycles of offcore demand data read requests. Does not +include L2 prefetch requests. +counter 0 +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE +.Pq Event 60H , Umask 02H +Counts weighted cycles of offcore demand code read requests. Does not +include L2 prefetch requests. +counter 0 +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO +.Pq Event 60H , Umask 04H +Counts weighted cycles of offcore demand RFO requests. Does not include L2 +prefetch requests. +counter 0 +.It Li OFFCORE_REQUESTS_OUTSTANDING.ANY.READ +.Pq Event 60H , Umask 08H +Counts weighted cycles of offcore read requests of any kind. Include L2 +prefetch requests. +counter 0 +.It Li CACHE_LOCK_CYCLES.L1D_L2 +.Pq Event 63H , Umask 01H +Cycle count during which the L1D and L2 are locked. A lock is asserted when +there is a locked memory access, due to uncacheable memory, a locked +operation that spans two cache lines, or a page walk from an uncacheable +page table. +Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and +it is highly recommended to avoid such accesses. +.It Li CACHE_LOCK_CYCLES.L1D +.Pq Event 63H , Umask 02H +Counts the number of cycles that cacheline in the L1 data cache unit is +locked. +Counter 0, 1 only. +.It Li IO_TRANSACTIONS +.Pq Event 6CH , Umask 01H +Counts the number of completed I/O transactions. +.It Li L1I.HITS +.Pq Event 80H , Umask 01H +Counts all instruction fetches that hit the L1 instruction cache. +.It Li L1I.MISSES +.Pq Event 80H , Umask 02H +Counts all instruction fetches that miss the L1I cache. This includes +instruction cache misses, streaming buffer misses, victim cache misses and +uncacheable fetches. An instruction fetch miss is counted only once and not +once for every cycle it is outstanding. +.It Li L1I.READS +.Pq Event 80H , Umask 03H +Counts all instruction fetches, including uncacheable fetches that bypass +the L1I. +.It Li L1I.CYCLES_STALLED +.Pq Event 80H , Umask 04H +Cycle counts for which an instruction fetch stalls due to a L1I cache miss, +ITLB miss or ITLB fault. +.It Li LARGE_ITLB.HIT +.Pq Event 82H , Umask 01H +Counts number of large ITLB hits. +.It Li ITLB_MISSES.ANY +.Pq Event 85H , Umask 01H +Counts the number of misses in all levels of the ITLB which causes a page +walk. +.It Li ITLB_MISSES.WALK_COMPLETED +.Pq Event 85H , Umask 02H +Counts number of misses in all levels of the ITLB which resulted in a +completed page walk. +.It Li ITLB_MISSES.WALK_CYCLES +.Pq Event 85H , Umask 04H +Counts ITLB miss page walk cycles. +.It Li ITLB_MISSES.LARGE_WALK_COMPLETED +.Pq Event 85H , Umask 80H +Counts number of completed large page walks due to misses in the STLB. +.It Li ILD_STALL.LCP +.Pq Event 87H , Umask 01H +Cycles Instruction Length Decoder stalls due to length changing prefixes: +66, 67 or REX.W (for EM64T) instructions which change the length of the +decoded instruction. +.It Li ILD_STALL.MRU +.Pq Event 87H , Umask 02H +Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU) +Most Recently Used (MRU) bypass. +.It Li ILD_STALL.IQ_FULL +.Pq Event 87H , Umask 04H +Stall cycles due to a full instruction queue. +.It Li ILD_STALL.REGEN +.Pq Event 87H , Umask 08H +Counts the number of regen stalls. +.It Li ILD_STALL.ANY +.Pq Event 87H , Umask 0FH +Counts any cycles the Instruction Length Decoder is stalled. +.It Li BR_INST_EXEC.COND +.Pq Event 88H , Umask 01H +Counts the number of conditional near branch instructions executed, but not +necessarily retired. +.It Li BR_INST_EXEC.DIRECT +.Pq Event 88H , Umask 02H +Counts all unconditional near branch instructions excluding calls and +indirect branches. +.It Li BR_INST_EXEC.INDIRECT_NON_CALL +.Pq Event 88H , Umask 04H +Counts the number of executed indirect near branch instructions that are not +calls. +.It Li BR_INST_EXEC.NON_CALLS +.Pq Event 88H , Umask 07H +Counts all non call near branch instructions executed, but not necessarily +retired. +.It Li BR_INST_EXEC.RETURN_NEAR +.Pq Event 88H , Umask 08H +Counts indirect near branches that have a return mnemonic. +.It Li BR_INST_EXEC.DIRECT_NEAR_CALL +.Pq Event 88H , Umask 10H +Counts unconditional near call branch instructions, excluding non call +branch, executed. +.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL +.Pq Event 88H , Umask 20H +Counts indirect near calls, including both register and memory indirect, +executed. +.It Li BR_INST_EXEC.NEAR_CALLS +.Pq Event 88H , Umask 30H +Counts all near call branches executed, but not necessarily retired. +.It Li BR_INST_EXEC.TAKEN +.Pq Event 88H , Umask 40H +Counts taken near branches executed, but not necessarily retired. +.It Li BR_INST_EXEC.ANY +.Pq Event 88H , Umask 7FH +Counts all near executed branches (not necessarily retired). This includes +only instructions and not micro-op branches. Frequent branching is not +necessarily a major performance issue. However frequent branch +mispredictions may be a problem. +.It Li BR_MISP_EXEC.COND +.Pq Event 89H , Umask 01H +Counts the number of mispredicted conditional near branch instructions +executed, but not necessarily retired. +.It Li BR_MISP_EXEC.DIRECT +.Pq Event 89H , Umask 02H +Counts mispredicted macro unconditional near branch instructions, excluding +calls and indirect branches (should always be 0). +.It Li BR_MISP_EXEC.INDIRECT_NON_CALL +.Pq Event 89H , Umask 04H +Counts the number of executed mispredicted indirect near branch instructions +that are not calls. +.It Li BR_MISP_EXEC.NON_CALLS +.Pq Event 89H , Umask 07H +Counts mispredicted non call near branches executed, but not necessarily +retired. +.It Li BR_MISP_EXEC.RETURN_NEAR +.Pq Event 89H , Umask 08H +Counts mispredicted indirect branches that have a rear return mnemonic. +.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL +.Pq Event 89H , Umask 10H +Counts mispredicted non-indirect near calls executed, (should always be 0). +.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL +.Pq Event 89H , Umask 20H +Counts mispredicted indirect near calls exeucted, including both register +and memory indirect. +.It Li BR_MISP_EXEC.NEAR_CALLS +.Pq Event 89H , Umask 30H +Counts all mispredicted near call branches executed, but not necessarily +retired. +.It Li BR_MISP_EXEC.TAKEN +.Pq Event 89H , Umask 40H +Counts executed mispredicted near branches that are taken, but not +necessarily retired. +.It Li BR_MISP_EXEC.ANY +.Pq Event 89H , Umask 7FH +Counts the number of mispredicted near branch instructions that were +executed, but not necessarily retired. +.It Li RESOURCE_STALLS.ANY +.Pq Event A2H , Umask 01H +Counts the number of Allocator resource related stalls. Includes register +renaming buffer entries, memory buffer entries. In addition to resource +related stalls, this event counts some other events. Includes stalls arising +during branch misprediction recovery, such as if retirement of the +mispredicted branch is delayed and stalls arising while store buffer is +draining from synchronizing operations. +Does not include stalls due to SuperQ (off core) queue full, too many cache +misses, etc. +.It Li RESOURCE_STALLS.LOAD +.Pq Event A2H , Umask 02H +Counts the cycles of stall due to lack of load buffer for load operation. +.It Li RESOURCE_STALLS.RS_FULL +.Pq Event A2H , Umask 04H +This event counts the number of cycles when the number of instructions in +the pipeline waiting for execution reaches the limit the processor can +handle. A high count of this event indicates that there are long latency +operations in the pipe (possibly load and store operations that miss the L2 +cache, or instructions dependent upon instructions further down the pipeline +that have yet to retire. +When RS is full, new instructions can not enter the reservation station and +start execution. +.It Li RESOURCE_STALLS.STORE +.Pq Event A2H , Umask 08H +This event counts the number of cycles that a resource related stall will +occur due to the number of store instructions reaching the limit of the +pipeline, (i.e. all store buffers are used). The stall ends when a store +instruction commits its data to the cache or memory. +.It Li RESOURCE_STALLS.ROB_FULL +.Pq Event A2H , Umask 10H +Counts the cycles of stall due to re- order buffer full. +.It Li RESOURCE_STALLS.FPCW +.Pq Event A2H , Umask 20H +Counts the number of cycles while execution was stalled due to writing the +floating-point unit (FPU) control word. +.It Li RESOURCE_STALLS.MXCSR +.Pq Event A2H , Umask 40H +Stalls due to the MXCSR register rename occurring to close to a previous +MXCSR rename. The MXCSR provides control and status for the MMX registers. +.It Li RESOURCE_STALLS.OTHER +.Pq Event A2H , Umask 80H +Counts the number of cycles while execution was stalled due to other +resource issues. +.It Li MACRO_INSTS.FUSIONS_DECODED +.Pq Event A6H , Umask 01H +Counts the number of instructions decoded that are macro-fused but not +necessarily executed or retired. +.It Li BACLEAR_FORCE_IQ +.Pq Event A7H , Umask 01H +Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ +is also responsible for providing conditional branch prediciton direction +based on a static scheme and dynamic data provided by the L2 Branch +Prediction Unit. If the conditional branch target is not found in the Target +Array and the IQ predicts that the branch is taken, then the IQ will force +the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by +the BAC generates approximately an 8 cycle bubble in the instruction fetch +pipeline. +.It Li LSD.UOPS +.Pq Event A8H , Umask 01H +Counts the number of micro-ops delivered by loop stream detector +Use cmask=1 and invert to count cycles +.It Li ITLB_FLUSH +.Pq Event AEH , Umask 01H +Counts the number of ITLB flushes +.It Li OFFCORE_REQUESTS.DEMAND.READ_DATA +.Pq Event B0H , Umask 01H +Counts number of offcore demand data read requests. Does not count L2 +prefetch requests. +.It Li OFFCORE_REQUESTS.DEMAND.READ_CODE +.Pq Event B0H , Umask 02H +Counts number of offcore demand code read requests. Does not count L2 +prefetch requests. +.It Li OFFCORE_REQUESTS.DEMAND.RFO +.Pq Event B0H , Umask 04H +Counts number of offcore demand RFO requests. Does not count L2 prefetch +requests. +.It Li OFFCORE_REQUESTS.ANY.READ +.Pq Event B0H , Umask 08H +Counts number of offcore read requests. Includes L2 prefetch requests. +.It Li OFFCORE_REQUESTS.ANY.RFO +.Pq Event 80H , Umask 10H +Counts number of offcore RFO requests. Includes L2 prefetch requests. +.It Li OFFCORE_REQUESTS.L1D_WRITEBACK +.Pq Event B0H , Umask 40H +Counts number of L1D writebacks to the uncore. +.It Li OFFCORE_REQUESTS.ANY +.Pq Event B0H , Umask 80H +Counts all offcore requests. +.It Li UOPS_EXECUTED.PORT0 +.Pq Event B1H , Umask 01H +Counts number of Uops executed that were issued on port 0. Port 0 handles +integer arithmetic, SIMD and FP add Uops. +.It Li UOPS_EXECUTED.PORT1 +.Pq Event B1H , Umask 02H +Counts number of Uops executed that were issued on port 1. Port 1 handles +integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops. +.It Li UOPS_EXECUTED.PORT2_CORE +.Pq Event B1H , Umask 04H +Counts number of Uops executed that were issued on port 2. Port 2 handles +the load Uops. This is a core count only and can not be collected per +thread. +.It Li UOPS_EXECUTED.PORT3_CORE +.Pq Event B1H , Umask 08H +Counts number of Uops executed that were issued on port 3. Port 3 handles +store Uops. This is a core count only and can not be collected per thread. +.It Li UOPS_EXECUTED.PORT4_CORE +.Pq Event B1H , Umask 10H +Counts number of Uops executed that where issued on port 4. Port 4 handles +the value to be stored for the store Uops issued on port 3. This is a core +count only and can not be collected per thread. +.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5 +.Pq Event B1H , Umask 1FH +Counts number of cycles there are one or more uops being executed and were +issued on ports 0-4. This is a core count only and can not be collected per +thread. +.It Li UOPS_EXECUTED.PORT5 +.Pq Event B1H , Umask 20H +Counts number of Uops executed that where issued on port 5. +.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES +.Pq Event B1H , Umask 3FH +Counts number of cycles there are one or more uops being executed on any +ports. This is a core count only and can not be collected per thread. +.It Li UOPS_EXECUTED.PORT015 +.Pq Event B1H , Umask 40H +Counts number of Uops executed that where issued on port 0, 1, or 5. +use cmask=1, invert=1 to count stall cycles +.It Li UOPS_EXECUTED.PORT234 +.Pq Event B1H , Umask 80H +Counts number of Uops executed that where issued on port 2, 3, or 4. +.It Li OFFCORE_REQUESTS_SQ_FULL +.Pq Event B2H , Umask 01H +Counts number of cycles the SQ is full to handle off-core requests. +.It Li SNOOPQ_REQUESTS_OUTSTANDING.DATA +.Pq Event B3H , Umask 01H +Counts weighted cycles of snoopq requests for data. Counter 0 only +Use cmask=1 to count cycles not empty. +.It Li SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE +.Pq Event B3H , Umask 02H +Counts weighted cycles of snoopq invalidate requests. Counter 0 only +Use cmask=1 to count cycles not empty. +.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE +.Pq Event B3H , Umask 04H +Counts weighted cycles of snoopq requests for code. Counter 0 only +Use cmask=1 to count cycles not empty. +.It Li SNOOPQ_REQUESTS.CODE +.Pq Event B4H , Umask 01H +Counts the number of snoop code requests +.It Li SNOOPQ_REQUESTS.DATA +.Pq Event B4H , Umask 02H +Counts the number of snoop data requests +.It Li SNOOPQ_REQUESTS.INVALIDATE +.Pq Event B4H , Umask 04H +Counts the number of snoop invalidate requests +.It Li OFF_CORE_RESPONSE_0 +.Pq Event B7H , Umask 01H +see Section 30.6.1.3, Off-core Response Performance Monitoring in the +Processor Core. +Requires programming MSR 01A6H +.It Li SNOOP_RESPONSE.HIT +.Pq Event B8H , Umask 01H +Counts HIT snoop response sent by this thread in response to a snoop +request. +.It Li SNOOP_RESPONSE.HITE +.Pq Event B8H , Umask 02H +Counts HIT E snoop response sent by this thread in response to a snoop +request. +.It Li SNOOP_RESPONSE.HITM +.Pq Event B8H , Umask 04H +Counts HIT M snoop response sent by this thread in response to a snoop +request. +.It Li OFF_CORE_RESPONSE_1 +.Pq Event BBH , Umask 01H +see Section 30.6.1.3, Off-core Response Performance Monitoring in the +Processor Core +Use MSR 01A7H +.It Li INST_RETIRED.ANY_P +.Pq Event C0H , Umask 01H +See Table A-1 +Notes: INST_RETIRED.ANY is counted by a designated fixed counter. +INST_RETIRED.ANY_P is counted by a programmable counter and is an +architectural performance event. Event is supported if CPUID.A.EBX[1] = 0. +Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not +count as retired instructions. +.It Li INST_RETIRED.X87 +.Pq Event C0H , Umask 02H +Counts the number of floating point computational operations retired: +floating point computational operations executed by the assist handler and +sub-operations of complex floating point instructions like transcendental +instructions. +.It Li INST_RETIRED.MMX +.Pq Event C0H , Umask 04H +Counts the number of retired: MMX instructions. +.It Li UOPS_RETIRED.ANY +.Pq Event C2H , Umask 01H +Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2, +others=1; maximum count of 8 per cycle). Most instructions are composed of +one or two micro-ops. Some instructions are decoded into longer sequences +such as repeat instructions, floating point transcendental instructions, and +assists. +Use cmask=1 and invert to count active cycles or stalled cycles +.It Li UOPS_RETIRED.RETIRE_SLOTS +.Pq Event C2H , Umask 02H +Counts the number of retirement slots used each cycle +.It Li UOPS_RETIRED.MACRO_FUSED +.Pq Event C2H , Umask 04H +Counts number of macro-fused uops retired. +.It Li MACHINE_CLEARS.CYCLES +.Pq Event C3H , Umask 01H +Counts the cycles machine clear is asserted. +.It Li MACHINE_CLEARS.MEM_ORDER +.Pq Event C3H , Umask 02H +Counts the number of machine clears due to memory order conflicts. +.It Li MACHINE_CLEARS.SMC +.Pq Event C3H , Umask 04H +Counts the number of times that a program writes to a code section. +Self-modifying code causes a sever penalty in all Intel 64 and IA-32 +processors. The modified cache line is written back to the L2 and L3caches. +.It Li BR_INST_RETIRED.ALL_BRANCHES +.Pq Event C4H , Umask 00H +See Table A-1 +.It Li BR_INST_RETIRED.CONDITIONAL +.Pq Event C4H , Umask 01H +Counts the number of conditional branch instructions retired. +.It Li BR_INST_RETIRED.NEAR_CALL +.Pq Event C4H , Umask 02H +Counts the number of direct & indirect near unconditional calls retired +.It Li BR_INST_RETIRED.ALL_BRANCHES +.Pq Event C4H , Umask 04H +Counts the number of branch instructions retired +.It Li BR_MISP_RETIRED.ALL_BRANCHES +.Pq Event C5H , Umask 00H +See Table A-1 +.It Li BR_MISP_RETIRED.CONDITIONAL +.Pq Event C5H , Umask 01H +Counts mispredicted conditional retired calls. +.It Li BR_MISP_RETIRED.NEAR_CALL +.Pq Event C5H , Umask 02H +Counts mispredicted direct & indirect near unconditional retired calls. +.It Li BR_MISP_RETIRED.ALL_BRANCHES +.Pq Event C5H , Umask 04H +Counts all mispredicted retired calls. +.It Li SSEX_UOPS_RETIRED.PACKED_SINGLE +.Pq Event C7H , Umask 01H +Counts SIMD packed single-precision floating point Uops retired. +.It Li SSEX_UOPS_RETIRED.SCALAR_SINGLE +.Pq Event C7H , Umask 02H +Counts SIMD calar single-precision floating point Uops retired. +.It Li SSEX_UOPS_RETIRED.PACKED_DOUBLE +.Pq Event C7H , Umask 04H +Counts SIMD packed double- precision floating point Uops retired. +.It Li SSEX_UOPS_RETIRED.SCALAR_DOUBLE +.Pq Event C7H , Umask 08H +Counts SIMD scalar double-precision floating point Uops retired. +.It Li SSEX_UOPS_RETIRED.VECTOR_INTEGER +.Pq Event C7H , Umask 10H +Counts 128-bit SIMD vector integer Uops retired. +.It Li ITLB_MISS_RETIRED +.Pq Event C8H , Umask 20H +Counts the number of retired instructions that missed the ITLB when the +instruction was fetched. +.It Li MEM_LOAD_RETIRED.L1D_HIT +.Pq Event CBH , Umask 01H +Counts number of retired loads that hit the L1 data cache. +.It Li MEM_LOAD_RETIRED.L2_HIT +.Pq Event CBH , Umask 02H +Counts number of retired loads that hit the L2 data cache. +.It Li MEM_LOAD_RETIRED.L3_UNSHARED_HIT +.Pq Event CBH , Umask 04H +Counts number of retired loads that hit their own, unshared lines in the L3 +cache. +.It Li MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM +.Pq Event CBH , Umask 08H +Counts number of retired loads that hit in a sibling core's L2 (on die +core). Since the L3 is inclusive of all cores on the package, this is an L3 +hit. This counts both clean or modified hits. +.It Li MEM_LOAD_RETIRED.L3_MISS +.Pq Event CBH , Umask 10H +Counts number of retired loads that miss the L3 cache. The load was +satisfied by a remote socket, local memory or an IOH. +.It Li MEM_LOAD_RETIRED.HIT_LFB +.Pq Event CBH , Umask 40H +Counts number of retired loads that miss the L1D and the address is located +in an allocated line fill buffer and will soon be committed to cache. This +is counting secondary L1D misses. +.It Li MEM_LOAD_RETIRED.DTLB_MISS +.Pq Event CBH , Umask 80H +Counts the number of retired loads that missed the DTLB. The DTLB miss is +not counted if the load operation causes a fault. This event counts loads +from cacheable memory only. The event does not count loads by software +prefetches. Counts both primary and secondary misses to the TLB. +.It Li FP_MMX_TRANS.TO_FP +.Pq Event CCH , Umask 01H +Counts the first floating-point instruction following any MMX instruction. +You can use this event to estimate the penalties for the transitions between +floating-point and MMX technology states. +.It Li FP_MMX_TRANS.TO_MMX +.Pq Event CCH , Umask 02H +Counts the first MMX instruction following a floating-point instruction. You +can use this event to estimate the penalties for the transitions between +floating-point and MMX technology states. +.It Li FP_MMX_TRANS.ANY +.Pq Event CCH , Umask 03H +Counts all transitions from floating point to MMX instructions and from MMX +instructions to floating point instructions. You can use this event to +estimate the penalties for the transitions between floating-point and MMX +technology states. +.It Li MACRO_INSTS.DECODED +.Pq Event D0H , Umask 01H +Counts the number of instructions decoded, (but not necessarily executed or +retired). +.It Li UOPS_DECODED.STALL_CYCLES +.Pq Event D1H , Umask 01H +Counts the cycles of decoder stalls. +.It Li UOPS_DECODED.MS +.Pq Event D1H , Umask 02H +Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS +delivers uops when the instruction is more than 4 uops long or a microcode +assist is occurring. +.It Li UOPS_DECODED.ESP_FOLDING +.Pq Event D1H , Umask 04H +Counts number of stack pointer (ESP) instructions decoded: push , pop , call +, ret, etc. ESP instructions do not generate a Uop to increment or decrement +ESP. Instead, they update an ESP_Offset register that keeps track of the +delta to the current value of the ESP register. +.It Li UOPS_DECODED.ESP_SYNC +.Pq Event D1H , Umask 08H +Counts number of stack pointer (ESP) sync operations where an ESP +instruction is corrected by adding the ESP offset register to the current +value of the ESP register. +.It Li RAT_STALLS.FLAGS +.Pq Event D2H , Umask 01H +Counts the number of cycles during which execution stalled due to several +reasons, one of which is a partial flag register stall. A partial register +stall may occur when two conditions are met: 1) an instruction modifies +some, but not all, of the flags in the flag register and 2) the next +instruction, which depends on flags, depends on flags that were not modified +by this instruction. +.It Li RAT_STALLS.REGISTERS +.Pq Event D2H , Umask 02H +This event counts the number of cycles instruction execution latency became +longer than the defined latency because the instruction used a register that +was partially written by previous instruction. +.It Li RAT_STALLS.ROB_READ_PORT +.Pq Event D2H , Umask 04H +Counts the number of cycles when ROB read port stalls occurred, which did +not allow new micro-ops to enter the out-of-order pipeline. Note that, at +this stage in the pipeline, additional stalls may occur at the same cycle +and prevent the stalled micro-ops from entering the pipe. In such a case, +micro-ops retry entering the execution pipe in the next cycle and the +ROB-read port stall is counted again. +.It Li RAT_STALLS.SCOREBOARD +.Pq Event D2H , Umask 08H +Counts the cycles where we stall due to microarchitecturally required +serialization. Microcode scoreboarding stalls. +.It Li RAT_STALLS.ANY +.Pq Event D2H , Umask 0FH +Counts all Register Allocation Table stall cycles due to: Cycles when ROB +read port stalls occurred, which did not allow new micro-ops to enter the +execution pipe. Cycles when partial register stalls occurred Cycles when +flag stalls occurred Cycles floating-point unit (FPU) status word stalls +occurred. To count each of these conditions separately use the events: +RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and +RAT_STALLS.FPSW. +.It Li SEG_RENAME_STALLS +.Pq Event D4H , Umask 01H +Counts the number of stall cycles due to the lack of renaming resources for +the ES, DS, FS, and GS segment registers. If a segment is renamed but not +retired and a second update to the same segment occurs, a stall occurs in +the front- end of the pipeline until the renamed segment retires. +.It Li ES_REG_RENAMES +.Pq Event D5H , Umask 01H +Counts the number of times the ES segment register is renamed. +.It Li UOP_UNFUSION +.Pq Event DBH , Umask 01H +Counts unfusion events due to floating point exception to a fused uop. +.It Li BR_INST_DECODED +.Pq Event E0H , Umask 01H +Counts the number of branch instructions decoded. +.It Li BPU_MISSED_CALL_RET +.Pq Event E5H , Umask 01H +Counts number of times the Branch Prediciton Unit missed predicting a call +or return branch. +.It Li BACLEAR.CLEAR +.Pq Event E6H , Umask 01H +Counts the number of times the front end is resteered, mainly when the +Branch Prediction Unit cannot provide a correct prediction and this is +corrected by the Branch Address Calculator at the front end. This can occur +if the code has many branches such that they cannot be consumed by the BPU. +Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble +in the instruction fetch pipeline. The effect on total execution time +depends on the surrounding code. +.It Li BACLEAR.BAD_TARGET +.Pq Event E6H , Umask 02H +Counts number of Branch Address Calculator clears (BACLEAR) asserted due to +conditional branch instructions in which there was a target hit but the +direction was wrong. Each BACLEAR asserted by the BAC generates +approximately an 8 cycle bubble in the instruction fetch pipeline. +.It Li BPU_CLEARS.EARLY +.Pq Event E8H , Umask 01H +Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken +branch after incorrectly assuming that it was not taken. +The BPU clear leads to 2 cycle bubble in the Front End. +.It Li BPU_CLEARS.LATE +.Pq Event E8H , Umask 02H +Counts late Branch Prediction Unit clears due to Most Recently Used +conflicts. The PBU clear leads to a 3 cycle bubble in the Front End. +.It Li THREAD_ACTIVE +.Pq Event ECH , Umask 01H +Counts cycles threads are active. +.It Li L2_TRANSACTIONS.LOAD +.Pq Event F0H , Umask 01H +Counts L2 load operations due to HW prefetch or demand loads. +.It Li L2_TRANSACTIONS.RFO +.Pq Event F0H , Umask 02H +Counts L2 RFO operations due to HW prefetch or demand RFOs. +.It Li L2_TRANSACTIONS.IFETCH +.Pq Event F0H , Umask 04H +Counts L2 instruction fetch operations due to HW prefetch or demand ifetch. +.It Li L2_TRANSACTIONS.PREFETCH +.Pq Event F0H , Umask 08H +Counts L2 prefetch operations. +.It Li L2_TRANSACTIONS.L1D_WB +.Pq Event F0H , Umask 10H +Counts L1D writeback operations to the L2. +.It Li L2_TRANSACTIONS.FILL +.Pq Event F0H , Umask 20H +Counts L2 cache line fill operations due to load, RFO, L1D writeback or +prefetch. +.It Li L2_TRANSACTIONS.WB +.Pq Event F0H , Umask 40H +Counts L2 writeback operations to the L3. +.It Li L2_TRANSACTIONS.ANY +.Pq Event F0H , Umask 80H +Counts all L2 cache operations. +.It Li L2_LINES_IN.S_STATE +.Pq Event F1H , Umask 02H +Counts the number of cache lines allocated in the L2 cache in the S (shared) +state. +.It Li L2_LINES_IN.E_STATE +.Pq Event F1H , Umask 04H +Counts the number of cache lines allocated in the L2 cache in the E +(exclusive) state. +.It Li L2_LINES_IN.ANY +.Pq Event F1H , Umask 07H +Counts the number of cache lines allocated in the L2 cache. +.It Li L2_LINES_OUT.DEMAND_CLEAN +.Pq Event F2H , Umask 01H +Counts L2 clean cache lines evicted by a demand request. +.It Li L2_LINES_OUT.DEMAND_DIRTY +.Pq Event F2H , Umask 02H +Counts L2 dirty (modified) cache lines evicted by a demand request. +.It Li L2_LINES_OUT.PREFETCH_CLEAN +.Pq Event F2H , Umask 04H +Counts L2 clean cache line evicted by a prefetch request. +.It Li L2_LINES_OUT.PREFETCH_DIRTY +.Pq Event F2H , Umask 08H +Counts L2 modified cache line evicted by a prefetch request. +.It Li L2_LINES_OUT.ANY +.Pq Event F2H , Umask 0FH +Counts all L2 cache lines evicted for any reason. +.It Li SQ_MISC.LRU_HINTS +.Pq Event F4H , Umask 04H +Counts number of Super Queue LRU hints sent to L3. +.It Li SQ_MISC.SPLIT_LOCK +.Pq Event F4H , Umask 10H +Counts the number of SQ lock splits across a cache line. +.It Li SQ_FULL_STALL_CYCLES +.Pq Event F6H , Umask 01H +Counts cycles the Super Queue is full. Neither of the threads on this core +will be able to access the uncore. +.It Li FP_ASSIST.ALL +.Pq Event F7H , Umask 01H +Counts the number of floating point operations executed that required +micro-code assist intervention. Assists are required in the following cases: +SSE instructions, (Denormal input when the DAZ flag is off or Underflow +result when the FTZ flag is off): x87 instructions, (NaN or denormal are +loaded to a register or used as input from memory, Division by 0 or +Underflow output). +.It Li FP_ASSIST.OUTPUT +.Pq Event F7H , Umask 02H +Counts number of floating point micro-code assist when the output value +(destination register) is invalid. +.It Li FP_ASSIST.INPUT +.Pq Event F7H , Umask 04H +Counts number of floating point micro-code assist when the input value (one +of the source operands to an FP instruction) is invalid. +.It Li SIMD_INT_64.PACKED_MPY +.Pq Event FDH , Umask 01H +Counts number of SID integer 64 bit packed multiply operations. +.It Li SIMD_INT_64.PACKED_SHIFT +.Pq Event FDH , Umask 02H +Counts number of SID integer 64 bit packed shift operations. +.It Li SIMD_INT_64.PACK +.Pq Event FDH , Umask 04H +Counts number of SID integer 64 bit pack operations. +.It Li SIMD_INT_64.UNPACK +.Pq Event FDH , Umask 08H +Counts number of SID integer 64 bit unpack operations. +.It Li SIMD_INT_64.PACKED_LOGICAL +.Pq Event FDH , Umask 10H +Counts number of SID integer 64 bit logical operations. +.It Li SIMD_INT_64.PACKED_ARITH +.Pq Event FDH , Umask 20H +Counts number of SID integer 64 bit arithmetic operations. +.It Li SIMD_INT_64.SHUFFLE_MOVE +.Pq Event FDH , Umask 40H +Counts number of SID integer 64 bit shift or move operations. +.El +.Sh SEE ALSO +.Xr pmc 3 , +.Xr pmc.atom 3 , +.Xr pmc.core 3 , +.Xr pmc.iaf 3 , +.Xr pmc.ucf 3 , +.Xr pmc.k7 3 , +.Xr pmc.k8 3 , +.Xr pmc.p4 3 , +.Xr pmc.p5 3 , +.Xr pmc.p6 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , +.Xr pmc.westmereuc 3 , +.Xr pmc.tsc 3 , +.Xr pmc_cpuinfo 3 , +.Xr pmclog 3 , +.Xr hwpmc 4 +.Sh HISTORY +The +.Nm pmc +library first appeared in +.Fx 6.0 . +.Sh AUTHORS +The +.Lb libpmc +library was written by +.An "Joseph Koshy" +.Aq jkoshy@FreeBSD.org . diff --git a/lib/libpmc/pmc.westmereuc.3 b/lib/libpmc/pmc.westmereuc.3 new file mode 100644 index 0000000..c5fd12d --- /dev/null +++ b/lib/libpmc/pmc.westmereuc.3 @@ -0,0 +1,1083 @@ +.\" Copyright (c) 2010 Fabien Thomas. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" This software is provided by Joseph Koshy ``as is'' and +.\" any express or implied warranties, including, but not limited to, the +.\" implied warranties of merchantability and fitness for a particular purpose +.\" are disclaimed. in no event shall Joseph Koshy be liable +.\" for any direct, indirect, incidental, special, exemplary, or consequential +.\" damages (including, but not limited to, procurement of substitute goods +.\" or services; loss of use, data, or profits; or business interruption) +.\" however caused and on any theory of liability, whether in contract, strict +.\" liability, or tort (including negligence or otherwise) arising in any way +.\" out of the use of this software, even if advised of the possibility of +.\" such damage. +.\" +.\" $FreeBSD$ +.\" +.Dd March 24, 2010 +.Os +.Dt PMC.WESTMEREUC 3 +.Sh NAME +.Nm pmc.westmere +.Nd uncore measurement events for +.Tn Intel +.Tn Westmere +family CPUs +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Sh DESCRIPTION +.Tn Intel +.Tn "Westmere" +CPUs contain PMCs conforming to version 2 of the +.Tn Intel +performance measurement architecture. +These CPUs contain two classes of PMCs: +.Bl -tag -width "Li PMC_CLASS_UCP" +.It Li PMC_CLASS_UCF +Fixed-function counters that count only one hardware event per counter. +.It Li PMC_CLASS_UCP +Programmable counters that may be configured to count one of a defined +set of hardware events. +.El +.Pp +The number of PMCs available in each class and their widths need to be +determined at run time by calling +.Xr pmc_cpuinfo 3 . +.Pp +Intel Westmere PMCs are documented in +.Rs +.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual" +.%T "Volume 3B: System Programming Guide, Part 2" +.%N "Order Number: 253669-033US" +.%D December 2009 +.%Q "Intel Corporation" +.Re +.Ss WESTMERE UNCORE FIXED FUNCTION PMCS +These PMCs and their supported events are documented in +.Xr pmc.ucf 3 . +Not all CPUs in this family implement fixed-function counters. +.Ss WESTMERE UNCORE PROGRAMMABLE PMCS +The programmable PMCs support the following capabilities: +.Bl -column "PMC_CAP_INTERRUPT" "Support" +.It Em Capability Ta Em Support +.It PMC_CAP_CASCADE Ta \&No +.It PMC_CAP_EDGE Ta Yes +.It PMC_CAP_INTERRUPT Ta \&No +.It PMC_CAP_INVERT Ta Yes +.It PMC_CAP_READ Ta Yes +.It PMC_CAP_PRECISE Ta \&No +.It PMC_CAP_SYSTEM Ta \&No +.It PMC_CAP_TAGGING Ta \&No +.It PMC_CAP_THRESHOLD Ta Yes +.It PMC_CAP_USER Ta \&No +.It PMC_CAP_WRITE Ta Yes +.El +.Ss Event Qualifiers +Event specifiers for these PMCs support the following common +qualifiers: +.Bl -tag -width indent +.It Li cmask= Ns Ar value +Configure the PMC to increment only if the number of configured +events measured in a cycle is greater than or equal to +.Ar value . +.It Li edge +Configure the PMC to count the number of de-asserted to asserted +transitions of the conditions expressed by the other qualifiers. +If specified, the counter will increment only once whenever a +condition becomes true, irrespective of the number of clocks during +which the condition remains true. +.It Li inv +Invert the sense of comparison when the +.Dq Li cmask +qualifier is present, making the counter increment when the number of +events per cycle is less than the value specified by the +.Dq Li cmask +qualifier. +.El +.Ss Event Specifiers (Programmable PMCs) +Westmere uncore programmable PMCs support the following events: +.Bl -tag -width indent +.It Li GQ_CYCLES_FULL.READ_TRACKER +.Pq Event 00H , Umask 01H +Uncore cycles Global Queue read tracker is full. +.It Li GQ_CYCLES_FULL.WRITE_TRACKER +.Pq Event 00H , Umask 02H +Uncore cycles Global Queue write tracker is full. +.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER +.Pq Event 00H , Umask 04H +Uncore cycles Global Queue peer probe tracker is full. The peer probe +tracker queue tracks snoops from the IOH and remote sockets. +.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER +.Pq Event 01H , Umask 01H +Uncore cycles were Global Queue read tracker has at least one valid entry. +.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER +.Pq Event 01H , Umask 02H +Uncore cycles were Global Queue write tracker has at least one valid entry. +.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER +.Pq Event 01H , Umask 04H +Uncore cycles were Global Queue peer probe tracker has at least one valid +entry. The peer probe tracker queue tracks IOH and remote socket snoops. +.It Li GQ_OCCUPANCY.READ_TRACKER +.Pq Event 02H , Umask 01H +Increments the number of queue entries (code read, data read, and RFOs) in +the tread tracker. The GQ read tracker allocate to deallocate occupancy +count is divided by the count to obtain the average read tracker latency. +.It Li GQ_ALLOC.READ_TRACKER +.Pq Event 03H , Umask 01H +Counts the number of tread tracker allocate to deallocate entries. The GQ +read tracker allocate to deallocate occupancy count is divided by the count +to obtain the average read tracker latency. +.It Li GQ_ALLOC.RT_L3_MISS +.Pq Event 03H , Umask 02H +Counts the number GQ read tracker entries for which a full cache line read +has missed the L3. The GQ read tracker L3 miss to fill occupancy count is +divided by this count to obtain the average cache line read L3 miss latency. +The latency represents the time after which the L3 has determined that the +cache line has missed. The time between a GQ read tracker allocation and the +L3 determining that the cache line has missed is the average L3 hit latency. +The total L3 cache line read miss latency is the hit latency + L3 miss +latency. +.It Li GQ_ALLOC.RT_TO_L3_RESP +.Pq Event 03H , Umask 04H +Counts the number of GQ read tracker entries that are allocated in the read +tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy +count is divided by this count to obtain the average L3 hit latency. +.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED +.Pq Event 03H , Umask 08H +Counts the number of GQ read tracker entries that are allocated in the read +tracker, have missed in the L3 and have not acquired a Request Transaction +ID. The GQ read tracker L3 miss to RTID acquired occupancy count is +divided by this count to obtain the average latency for a read L3 miss to +acquire an RTID. +.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED +.Pq Event 03H , Umask 10H +Counts the number of GQ write tracker entries that are allocated in the +write tracker, have missed in the L3 and have not acquired a Request +Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is +divided by this count to obtain the average latency for a write L3 miss to +acquire an RTID. +.It Li GQ_ALLOC.WRITE_TRACKER +.Pq Event 03H , Umask 20H +Counts the number of GQ write tracker entries that are allocated in the +write tracker queue that miss the L3. The GQ write tracker occupancy count +is divided by the this count to obtain the average L3 write miss latency. +.It Li GQ_ALLOC.PEER_PROBE_TRACKER +.Pq Event 03H , Umask 40H +Counts the number of GQ peer probe tracker (snoop) entries that are +allocated in the peer probe tracker queue that miss the L3. The GQ peer +probe occupancy count is divided by this count to obtain the average L3 peer +probe miss latency. +.It Li GQ_DATA.FROM_QPI +.Pq Event 04H , Umask 01H +Cycles Global Queue Quickpath Interface input data port is busy importing +data from the Quickpath Interface. Each cycle the input port can transfer 8 +or 16 bytes of data. +.It Li GQ_DATA.FROM_QMC +.Pq Event 04H , Umask 02H +Cycles Global Queue Quickpath Memory Interface input data port is busy +importing data from the Quickpath Memory Interface. Each cycle the input +port can transfer 8 or 16 bytes of data. +.It Li GQ_DATA.FROM_L3 +.Pq Event 04H , Umask 04H +Cycles GQ L3 input data port is busy importing data from the Last Level +Cache. Each cycle the input port can transfer 32 bytes of data. +.It Li GQ_DATA.FROM_CORES_02 +.Pq Event 04H , Umask 08H +Cycles GQ Core 0 and 2 input data port is busy importing data from processor +cores 0 and 2. Each cycle the input port can transfer 32 bytes of data. +.It Li GQ_DATA.FROM_CORES_13 +.Pq Event 04H , Umask 10H +Cycles GQ Core 1 and 3 input data port is busy importing data from processor +cores 1 and 3. Each cycle the input port can transfer 32 bytes of data. +.It Li GQ_DATA.TO_QPI_QMC +.Pq Event 05H , Umask 01H +Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath +Interface or Quickpath Memory Interface. Each cycle the output port can +transfer 32 bytes of data. +.It Li GQ_DATA.TO_L3 +.Pq Event 05H , Umask 02H +Cycles GQ L3 output data port is busy sending data to the Last Level Cache. +Each cycle the output port can transfer 32 bytes of data. +.It Li GQ_DATA.TO_CORES +.Pq Event 05H , Umask 04H +Cycles GQ Core output data port is busy sending data to the Cores. Each +cycle the output port can transfer 32 bytes of data. +.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE +.Pq Event 06H , Umask 01H +Number of snoop responses to the local home that L3 does not have the +referenced cache line. +.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE +.Pq Event 06H , Umask 02H +Number of snoop responses to the local home that L3 has the referenced line +cached in the S state. +.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE +.Pq Event 06H , Umask 04H +Number of responses to code or data read snoops to the local home that the +L3 has the referenced cache line in the E state. The L3 cache line state is +changed to the S state and the line is forwarded to the local home in the S +state. +.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE +.Pq Event 06H , Umask 08H +Number of responses to read invalidate snoops to the local home that the L3 +has the referenced cache line in the M state. The L3 cache line state is +invalidated and the line is forwarded to the local home in the M state. +.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT +.Pq Event 06H , Umask 10H +Number of conflict snoop responses sent to the local home. +.It Li SNP_RESP_TO_LOCAL_HOME.WB +.Pq Event 06H , Umask 20H +Number of responses to code or data read snoops to the local home that the +L3 has the referenced line cached in the M state. +.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE +.Pq Event 07H , Umask 01H +Number of snoop responses to a remote home that L3 does not have the +referenced cache line. +.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE +.Pq Event 07H , Umask 02H +Number of snoop responses to a remote home that L3 has the referenced line +cached in the S state. +.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE +.Pq Event 07H , Umask 04H +Number of responses to code or data read snoops to a remote home that the L3 +has the referenced cache line in the E state. The L3 cache line state is +changed to the S state and the line is forwarded to the remote home in the S +state. +.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE +.Pq Event 07H , Umask 08H +Number of responses to read invalidate snoops to a remote home that the L3 +has the referenced cache line in the M state. The L3 cache line state is +invalidated and the line is forwarded to the remote home in the M state. +.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT +.Pq Event 07H , Umask 10H +Number of conflict snoop responses sent to the local home. +.It Li SNP_RESP_TO_REMOTE_HOME.WB +.Pq Event 07H , Umask 20H +Number of responses to code or data read snoops to a remote home that the L3 +has the referenced line cached in the M state. +.It Li SNP_RESP_TO_REMOTE_HOME.HITM +.Pq Event 07H , Umask 24H +Number of HITM snoop responses to a remote home +.It Li L3_HITS.READ +.Pq Event 08H , Umask 01H +Number of code read, data read and RFO requests that hit in the L3 +.It Li L3_HITS.WRITE +.Pq Event 08H , Umask 02H +Number of writeback requests that hit in the L3. Writebacks from the cores +will always result in L3 hits due to the inclusive property of the L3. +.It Li L3_HITS.PROBE +.Pq Event 08H , Umask 04H +Number of snoops from IOH or remote sockets that hit in the L3. +.It Li L3_HITS.ANY +.Pq Event 08H , Umask 03H +Number of reads and writes that hit the L3. +.It Li L3_MISS.READ +.Pq Event 09H , Umask 01H +Number of code read, data read and RFO requests that miss the L3. +.It Li L3_MISS.WRITE +.Pq Event 09H , Umask 02H +Number of writeback requests that miss the L3. Should always be zero as +writebacks from the cores will always result in L3 hits due to the inclusive +property of the L3. +.It Li L3_MISS.PROBE +.Pq Event 09H , Umask 04H +Number of snoops from IOH or remote sockets that miss the L3. +.It Li L3_MISS.ANY +.Pq Event 09H , Umask 03H +Number of reads and writes that miss the L3. +.It Li L3_LINES_IN.M_STATE +.Pq Event 0AH , Umask 01H +Counts the number of L3 lines allocated in M state. The only time a cache +line is allocated in the M state is when the line was forwarded in M state +is forwarded due to a Snoop Read Invalidate Own request. +.It Li L3_LINES_IN.E_STATE +.Pq Event 0AH , Umask 02H +Counts the number of L3 lines allocated in E state. +.It Li L3_LINES_IN.S_STATE +.Pq Event 0AH , Umask 04H +Counts the number of L3 lines allocated in S state. +.It Li L3_LINES_IN.F_STATE +.Pq Event 0AH , Umask 08H +Counts the number of L3 lines allocated in F state. +.It Li L3_LINES_IN.ANY +.Pq Event 0AH , Umask 0FH +Counts the number of L3 lines allocated in any state. +.It Li L3_LINES_OUT.M_STATE +.Pq Event 0BH , Umask 01H +Counts the number of L3 lines victimized that were in the M state. When the +victim cache line is in M state, the line is written to its home cache agent +which can be either local or remote. +.It Li L3_LINES_OUT.E_STATE +.Pq Event 0BH , Umask 02H +Counts the number of L3 lines victimized that were in the E state. +.It Li L3_LINES_OUT.S_STATE +.Pq Event 0BH , Umask 04H +Counts the number of L3 lines victimized that were in the S state. +.It Li L3_LINES_OUT.I_STATE +.Pq Event 0BH , Umask 08H +Counts the number of L3 lines victimized that were in the I state. +.It Li L3_LINES_OUT.F_STATE +.Pq Event 0BH , Umask 10H +Counts the number of L3 lines victimized that were in the F state. +.It Li L3_LINES_OUT.ANY +.Pq Event 0BH , Umask 1FH +Counts the number of L3 lines victimized in any state. +.It Li GQ_SNOOP.GOTO_S +.Pq Event 0CH , Umask 01H +Counts the number of remote snoops that have requested a cache line be set +to the S state. +.It Li GQ_SNOOP.GOTO_I +.Pq Event 0CH , Umask 02H +Counts the number of remote snoops that have requested a cache line be set +to the I state. +.It Li GQ_SNOOP.GOTO_S_HIT_E +.Pq Event 0CH , Umask 04H +Counts the number of remote snoops that have requested a cache line be set +to the S state from E state. +Requires writing MSR 301H with mask = 2H +.It Li GQ_SNOOP.GOTO_S_HIT_F +.Pq Event 0CH , Umask 04H +Counts the number of remote snoops that have requested a cache line be set +to the S state from F (forward) state. +Requires writing MSR 301H with mask = 8H +.It Li GQ_SNOOP.GOTO_S_HIT_M +.Pq Event 0CH , Umask 04H +Counts the number of remote snoops that have requested a cache line be set +to the S state from M state. +Requires writing MSR 301H with mask = 1H +.It Li GQ_SNOOP.GOTO_S_HIT_S +.Pq Event 0CH , Umask 04H +Counts the number of remote snoops that have requested a cache line be set +to the S state from S state. +Requires writing MSR 301H with mask = 4H +.It Li GQ_SNOOP.GOTO_I_HIT_E +.Pq Event 0CH , Umask 08H +Counts the number of remote snoops that have requested a cache line be set +to the I state from E state. +Requires writing MSR 301H with mask = 2H +.It Li GQ_SNOOP.GOTO_I_HIT_F +.Pq Event 0CH , Umask 08H +Counts the number of remote snoops that have requested a cache line be set +to the I state from F (forward) state. +Requires writing MSR 301H with mask = 8H +.It Li GQ_SNOOP.GOTO_I_HIT_M +.Pq Event 0CH , Umask 08H +Counts the number of remote snoops that have requested a cache line be set +to the I state from M state. +Requires writing MSR 301H with mask = 1H +.It Li GQ_SNOOP.GOTO_I_HIT_S +.Pq Event 0CH , Umask 08H +Counts the number of remote snoops that have requested a cache line be set +to the I state from S state. +Requires writing MSR 301H with mask = 4H +.It Li QHL_REQUESTS.IOH_READS +.Pq Event 20H , Umask 01H +Counts number of Quickpath Home Logic read requests from the IOH. +.It Li QHL_REQUESTS.IOH_WRITES +.Pq Event 20H , Umask 02H +Counts number of Quickpath Home Logic write requests from the IOH. +.It Li QHL_REQUESTS.REMOTE_READS +.Pq Event 20H , Umask 04H +Counts number of Quickpath Home Logic read requests from a remote socket. +.It Li QHL_REQUESTS.REMOTE_WRITES +.Pq Event 20H , Umask 08H +Counts number of Quickpath Home Logic write requests from a remote socket. +.It Li QHL_REQUESTS.LOCAL_READS +.Pq Event 20H , Umask 10H +Counts number of Quickpath Home Logic read requests from the local socket. +.It Li QHL_REQUESTS.LOCAL_WRITES +.Pq Event 20H , Umask 20H +Counts number of Quickpath Home Logic write requests from the local socket. +.It Li QHL_CYCLES_FULL.IOH +.Pq Event 21H , Umask 01H +Counts uclk cycles all entries in the Quickpath Home Logic IOH are full. +.It Li QHL_CYCLES_FULL.REMOTE +.Pq Event 21H , Umask 02H +Counts uclk cycles all entries in the Quickpath Home Logic remote tracker +are full. +.It Li QHL_CYCLES_FULL.LOCAL +.Pq Event 21H , Umask 04H +Counts uclk cycles all entries in the Quickpath Home Logic local tracker are +full. +.It Li QHL_CYCLES_NOT_EMPTY.IOH +.Pq Event 22H , Umask 01H +Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy. +.It Li QHL_CYCLES_NOT_EMPTY.REMOTE +.Pq Event 22H , Umask 02H +Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is +busy. +.It Li QHL_CYCLES_NOT_EMPTY.LOCAL +.Pq Event 22H , Umask 04H +Counts uclk cycles all entries in the Quickpath Home Logic local tracker is +busy. +.It Li QHL_OCCUPANCY.IOH +.Pq Event 23H , Umask 01H +QHL IOH tracker allocate to deallocate read occupancy. +.It Li QHL_OCCUPANCY.REMOTE +.Pq Event 23H , Umask 02H +QHL remote tracker allocate to deallocate read occupancy. +.It Li QHL_OCCUPANCY.LOCAL +.Pq Event 23H , Umask 04H +QHL local tracker allocate to deallocate read occupancy. +.It Li QHL_ADDRESS_CONFLICTS.2WAY +.Pq Event 24H , Umask 02H +Counts number of QHL Active Address Table (AAT) entries that saw a max of 2 +conflicts. The AAT is a structure that tracks requests that are in conflict. +The requests themselves are in the home tracker entries. The count is +reported when an AAT entry deallocates. +.It Li QHL_ADDRESS_CONFLICTS.3WAY +.Pq Event 24H , Umask 04H +Counts number of QHL Active Address Table (AAT) entries that saw a max of 3 +conflicts. The AAT is a structure that tracks requests that are in conflict. +The requests themselves are in the home tracker entries. The count is +reported when an AAT entry deallocates. +.It Li QHL_CONFLICT_CYCLES.IOH +.Pq Event 25H , Umask 01H +Counts cycles the Quickpath Home Logic IOH Tracker contains two or more +requests with an address conflict. A max of 3 requests can be in conflict. +.It Li QHL_CONFLICT_CYCLES.REMOTE +.Pq Event 25H , Umask 02H +Counts cycles the Quickpath Home Logic Remote Tracker contains two or more +requests with an address conflict. A max of 3 requests can be in conflict. +.It Li QHL_CONFLICT_CYCLES.LOCAL +.Pq Event 25H , Umask 04H +Counts cycles the Quickpath Home Logic Local Tracker contains two or more +requests with an address conflict. A max of 3 requests can be in conflict. +.It Li QHL_TO_QMC_BYPASS +.Pq Event 26H , Umask 01H +Counts number or requests to the Quickpath Memory Controller that bypass the +Quickpath Home Logic. All local accesses can be bypassed. For remote +requests, only read requests can be bypassed. +.It Li QMC_ISOC_FULL.READ.CH0 +.Pq Event 28H , Umask 01H +Counts cycles all the entries in the DRAM channel 0 high priority queue are +occupied with isochronous read requests. +.It Li QMC_ISOC_FULL.READ.CH1 +.Pq Event 28H , Umask 02H +Counts cycles all the entries in the DRAM channel 1high priority queue are +occupied with isochronous read requests. +.It Li QMC_ISOC_FULL.READ.CH2 +.Pq Event 28H , Umask 04H +Counts cycles all the entries in the DRAM channel 2 high priority queue are +occupied with isochronous read requests. +.It Li QMC_ISOC_FULL.WRITE.CH0 +.Pq Event 28H , Umask 08H +Counts cycles all the entries in the DRAM channel 0 high priority queue are +occupied with isochronous write requests. +.It Li QMC_ISOC_FULL.WRITE.CH1 +.Pq Event 28H , Umask 10H +Counts cycles all the entries in the DRAM channel 1 high priority queue are +occupied with isochronous write requests. +.It Li QMC_ISOC_FULL.WRITE.CH2 +.Pq Event 28H , Umask 20H +Counts cycles all the entries in the DRAM channel 2 high priority queue are +occupied with isochronous write requests. +.It Li QMC_BUSY.READ.CH0 +.Pq Event 29H , Umask 01H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +read request to DRAM channel 0. +.It Li QMC_BUSY.READ.CH1 +.Pq Event 29H , Umask 02H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +read request to DRAM channel 1. +.It Li QMC_BUSY.READ.CH2 +.Pq Event 29H , Umask 04H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +read request to DRAM channel 2. +.It Li QMC_BUSY.WRITE.CH0 +.Pq Event 29H , Umask 08H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +write request to DRAM channel 0. +.It Li QMC_BUSY.WRITE.CH1 +.Pq Event 29H , Umask 10H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +write request to DRAM channel 1. +.It Li QMC_BUSY.WRITE.CH2 +.Pq Event 29H , Umask 20H +Counts cycles where Quickpath Memory Controller has at least 1 outstanding +write request to DRAM channel 2. +.It Li QMC_OCCUPANCY.CH0 +.Pq Event 2AH , Umask 01H +IMC channel 0 normal read request occupancy. +.It Li QMC_OCCUPANCY.CH1 +.Pq Event 2AH , Umask 02H +IMC channel 1 normal read request occupancy. +.It Li QMC_OCCUPANCY.CH2 +.Pq Event 2AH , Umask 04H +IMC channel 2 normal read request occupancy. +.It Li QMC_OCCUPANCY.ANY +.Pq Event 2AH , Umask 07H +Normal read request occupancy for any channel. +.It Li QMC_ISSOC_OCCUPANCY.CH0 +.Pq Event 2BH , Umask 01H +IMC channel 0 issoc read request occupancy. +.It Li QMC_ISSOC_OCCUPANCY.CH1 +.Pq Event 2BH , Umask 02H +IMC channel 1 issoc read request occupancy. +.It Li QMC_ISSOC_OCCUPANCY.CH2 +.Pq Event 2BH , Umask 04H +IMC channel 2 issoc read request occupancy. +.It Li QMC_ISSOC_READS.ANY +.Pq Event 2BH , Umask 07H +IMC issoc read request occupancy. +.It Li QMC_NORMAL_READS.CH0 +.Pq Event 2CH , Umask 01H +Counts the number of Quickpath Memory Controller channel 0 medium and low +priority read requests. The QMC channel 0 normal read occupancy divided by +this count provides the average QMC channel 0 read latency. +.It Li QMC_NORMAL_READS.CH1 +.Pq Event 2CH , Umask 02H +Counts the number of Quickpath Memory Controller channel 1 medium and low +priority read requests. The QMC channel 1 normal read occupancy divided by +this count provides the average QMC channel 1 read latency. +.It Li QMC_NORMAL_READS.CH2 +.Pq Event 2CH , Umask 04H +Counts the number of Quickpath Memory Controller channel 2 medium and low +priority read requests. The QMC channel 2 normal read occupancy divided by +this count provides the average QMC channel 2 read latency. +.It Li QMC_NORMAL_READS.ANY +.Pq Event 2CH , Umask 07H +Counts the number of Quickpath Memory Controller medium and low priority +read requests. The QMC normal read occupancy divided by this count provides +the average QMC read latency. +.It Li QMC_HIGH_PRIORITY_READS.CH0 +.Pq Event 2DH , Umask 01H +Counts the number of Quickpath Memory Controller channel 0 high priority +isochronous read requests. +.It Li QMC_HIGH_PRIORITY_READS.CH1 +.Pq Event 2DH , Umask 02H +Counts the number of Quickpath Memory Controller channel 1 high priority +isochronous read requests. +.It Li QMC_HIGH_PRIORITY_READS.CH2 +.Pq Event 2DH , Umask 04H +Counts the number of Quickpath Memory Controller channel 2 high priority +isochronous read requests. +.It Li QMC_HIGH_PRIORITY_READS.ANY +.Pq Event 2DH , Umask 07H +Counts the number of Quickpath Memory Controller high priority isochronous +read requests. +.It Li QMC_CRITICAL_PRIORITY_READS.CH0 +.Pq Event 2EH , Umask 01H +Counts the number of Quickpath Memory Controller channel 0 critical priority +isochronous read requests. +.It Li QMC_CRITICAL_PRIORITY_READS.CH1 +.Pq Event 2EH , Umask 02H +Counts the number of Quickpath Memory Controller channel 1 critical priority +isochronous read requests. +.It Li QMC_CRITICAL_PRIORITY_READS.CH2 +.Pq Event 2EH , Umask 04H +Counts the number of Quickpath Memory Controller channel 2 critical priority +isochronous read requests. +.It Li QMC_CRITICAL_PRIORITY_READS.ANY +.Pq Event 2EH , Umask 07H +Counts the number of Quickpath Memory Controller critical priority +isochronous read requests. +.It Li QMC_WRITES.FULL.CH0 +.Pq Event 2FH , Umask 01H +Counts number of full cache line writes to DRAM channel 0. +.It Li QMC_WRITES.FULL.CH1 +.Pq Event 2FH , Umask 02H +Counts number of full cache line writes to DRAM channel 1. +.It Li QMC_WRITES.FULL.CH2 +.Pq Event 2FH , Umask 04H +Counts number of full cache line writes to DRAM channel 2. +.It Li QMC_WRITES.FULL.ANY +.Pq Event 2FH , Umask 07H +Counts number of full cache line writes to DRAM. +.It Li QMC_WRITES.PARTIAL.CH0 +.Pq Event 2FH , Umask 08H +Counts number of partial cache line writes to DRAM channel 0. +.It Li QMC_WRITES.PARTIAL.CH1 +.Pq Event 2FH , Umask 10H +Counts number of partial cache line writes to DRAM channel 1. +.It Li QMC_WRITES.PARTIAL.CH2 +.Pq Event 2FH , Umask 20H +Counts number of partial cache line writes to DRAM channel 2. +.It Li QMC_WRITES.PARTIAL.ANY +.Pq Event 2FH , Umask 38H +Counts number of partial cache line writes to DRAM. +.It Li QMC_CANCEL.CH0 +.Pq Event 30H , Umask 01H +Counts number of DRAM channel 0 cancel requests. +.It Li QMC_CANCEL.CH1 +.Pq Event 30H , Umask 02H +Counts number of DRAM channel 1 cancel requests. +.It Li QMC_CANCEL.CH2 +.Pq Event 30H , Umask 04H +Counts number of DRAM channel 2 cancel requests. +.It Li QMC_CANCEL.ANY +.Pq Event 30H , Umask 07H +Counts number of DRAM cancel requests. +.It Li QMC_PRIORITY_UPDATES.CH0 +.Pq Event 31H , Umask 01H +Counts number of DRAM channel 0 priority updates. A priority update occurs +when an ISOC high or critical request is received by the QHL and there is a +matching request with normal priority that has already been issued to the +QMC. In this instance, the QHL will send a priority update to QMC to +expedite the request. +.It Li QMC_PRIORITY_UPDATES.CH1 +.Pq Event 31H , Umask 02H +Counts number of DRAM channel 1 priority updates. A priority update occurs +when an ISOC high or critical request is received by the QHL and there is a +matching request with normal priority that has already been issued to the +QMC. In this instance, the QHL will send a priority update to QMC to +expedite the request. +.It Li QMC_PRIORITY_UPDATES.CH2 +.Pq Event 31H , Umask 04H +Counts number of DRAM channel 2 priority updates. A priority update occurs +when an ISOC high or critical request is received by the QHL and there is a +matching request with normal priority that has already been issued to the +QMC. In this instance, the QHL will send a priority update to QMC to +expedite the request. +.It Li QMC_PRIORITY_UPDATES.ANY +.Pq Event 31H , Umask 07H +Counts number of DRAM priority updates. A priority update occurs when an +ISOC high or critical request is received by the QHL and there is a matching +request with normal priority that has already been issued to the QMC. In +this instance, the QHL will send a priority update to QMC to expedite the +request. +.It Li IMC_RETRY.CH0 +.Pq Event 32H , Umask 01H +Counts number of IMC DRAM channel 0 retries. DRAM retry only occurs when +configured in RAS mode. +.It Li IMC_RETRY.CH1 +.Pq Event 32H , Umask 02H +Counts number of IMC DRAM channel 1 retries. DRAM retry only occurs when +configured in RAS mode. +.It Li IMC_RETRY.CH2 +.Pq Event 32H , Umask 04H +Counts number of IMC DRAM channel 2 retries. DRAM retry only occurs when +configured in RAS mode. +.It Li IMC_RETRY.ANY +.Pq Event 32H , Umask 07H +Counts number of IMC DRAM retries from any channel. DRAM retry only occurs +when configured in RAS mode. +.It Li QHL_FRC_ACK_CNFLTS.IOH +.Pq Event 33H , Umask 01H +Counts number of Force Acknowledge Conflict messages sent by the Quickpath +Home Logic to the IOH. +.It Li QHL_FRC_ACK_CNFLTS.REMOTE +.Pq Event 33H , Umask 02H +Counts number of Force Acknowledge Conflict messages sent by the Quickpath +Home Logic to the remote home. +.It Li QHL_FRC_ACK_CNFLTS.LOCAL +.Pq Event 33H , Umask 04H +Counts number of Force Acknowledge Conflict messages sent by the Quickpath +Home Logic to the local home. +.It Li QHL_FRC_ACK_CNFLTS.ANY +.Pq Event 33H , Umask 07H +Counts number of Force Acknowledge Conflict messages sent by the Quickpath +Home Logic. +.It Li QHL_SLEEPS.IOH_ORDER +.Pq Event 34H , Umask 01H +Counts number of occurrences a request was put to sleep due to IOH ordering +(write after read) conflicts. While in the sleep state, the request is not +eligible to be scheduled to the QMC. +.It Li QHL_SLEEPS.REMOTE_ORDER +.Pq Event 34H , Umask 02H +Counts number of occurrences a request was put to sleep due to remote socket +ordering (write after read) conflicts. While in the sleep state, the request +is not eligible to be scheduled to the QMC. +.It Li QHL_SLEEPS.LOCAL_ORDER +.Pq Event 34H , Umask 04H +Counts number of occurrences a request was put to sleep due to local socket +ordering (write after read) conflicts. While in the sleep state, the request +is not eligible to be scheduled to the QMC. +.It Li QHL_SLEEPS.IOH_CONFLICT +.Pq Event 34H , Umask 08H +Counts number of occurrences a request was put to sleep due to IOH address +conflicts. While in the sleep state, the request is not eligible to be +scheduled to the QMC. +.It Li QHL_SLEEPS.REMOTE_CONFLICT +.Pq Event 34H , Umask 10H +Counts number of occurrences a request was put to sleep due to remote socket +address conflicts. While in the sleep state, the request is not eligible to +be scheduled to the QMC. +.It Li QHL_SLEEPS.LOCAL_CONFLICT +.Pq Event 34H , Umask 20H +Counts number of occurrences a request was put to sleep due to local socket +address conflicts. While in the sleep state, the request is not eligible to +be scheduled to the QMC. +.It Li ADDR_OPCODE_MATCH.IOH +.Pq Event 35H , Umask 01H +Counts number of requests from the IOH, address/opcode of request is +qualified by mask value written to MSR 396H. The following mask values are +supported: +0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS +40001D00_00000000H:RSPIWB +Match opcode/addres s by writing MSR 396H with mask supported mask value +.It Li ADDR_OPCODE_MATCH.REMOTE +.Pq Event 35H , Umask 02H +Counts number of requests from the remote socket, address/opcode of request +is qualified by mask value written to MSR 396H. The following mask values +are supported: +0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS +40001D00_00000000H:RSPIWB +Match opcode/addres s by writing MSR 396H with mask supported mask value +.It Li ADDR_OPCODE_MATCH.LOCAL +.Pq Event 35H , Umask 04H +Counts number of requests from the local socket, address/opcode of request +is qualified by mask value written to MSR 396H. The following mask values +are supported: +0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS +40001D00_00000000H:RSPIWB +Match opcode/addres s by writing MSR 396H with mask supported mask value +.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0 +.Pq Event 40H , Umask 01H +Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled +due to lack of a VNA and VN0 credit. Note that this event does not filter +out when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0 +.Pq Event 40H , Umask 02H +Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled +due to lack of a VNA and VN0 credit. Note that this event does not filter +out when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0 +.Pq Event 40H , Umask 04H +Counts cycles the Quickpath outbound link 0 non-data response virtual +channel is stalled due to lack of a VNA and VN0 credit. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1 +.Pq Event 40H , Umask 08H +Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled +due to lack of a VNA and VN0 credit. Note that this event does not filter +out when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1 +.Pq Event 40H , Umask 10H +Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled +due to lack of a VNA and VN0 credit. Note that this event does not filter +out when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1 +.Pq Event 40H , Umask 20H +Counts cycles the Quickpath outbound link 1 non-data response virtual +channel is stalled due to lack of a VNA and VN0 credit. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0 +.Pq Event 40H , Umask 07H +Counts cycles the Quickpath outbound link 0 virtual channels are stalled due +to lack of a VNA and VN0 credit. Note that this event does not filter out +when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1 +.Pq Event 40H , Umask 38H +Counts cycles the Quickpath outbound link 1 virtual channels are stalled due +to lack of a VNA and VN0 credit. Note that this event does not filter out +when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0 +.Pq Event 41H , Umask 01H +Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is +stalled due to lack of VNA and VN0 credits. Note that this event does not +filter out when a flit would not have been selected for arbitration because +another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0 +.Pq Event 41H , Umask 02H +Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual +channel is stalled due to lack of VNA and VN0 credits. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0 +.Pq Event 41H , Umask 04H +Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual +channel is stalled due to lack of VNA and VN0 credits. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1 +.Pq Event 41H , Umask 08H +Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is +stalled due to lack of VNA and VN0 credits. Note that this event does not +filter out when a flit would not have been selected for arbitration because +another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1 +.Pq Event 41H , Umask 10H +Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual +channel is stalled due to lack of VNA and VN0 credits. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1 +.Pq Event 41H , Umask 20H +Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual +channel is stalled due to lack of VNA and VN0 credits. Note that this event +does not filter out when a flit would not have been selected for arbitration +because another virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0 +.Pq Event 41H , Umask 07H +Counts cycles the Quickpath outbound link 0 virtual channels are stalled due +to lack of VNA and VN0 credits. Note that this event does not filter out +when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1 +.Pq Event 41H , Umask 38H +Counts cycles the Quickpath outbound link 1 virtual channels are stalled due +to lack of VNA and VN0 credits. Note that this event does not filter out +when a flit would not have been selected for arbitration because another +virtual channel is getting arbitrated. +.It Li QPI_TX_HEADER.FULL.LINK_0 +.Pq Event 42H , Umask 01H +Number of cycles that the header buffer in the Quickpath Interface outbound +link 0 is full. +.It Li QPI_TX_HEADER.BUSY.LINK_0 +.Pq Event 42H , Umask 02H +Number of cycles that the header buffer in the Quickpath Interface outbound +link 0 is busy. +.It Li QPI_TX_HEADER.FULL.LINK_1 +.Pq Event 42H , Umask 04H +Number of cycles that the header buffer in the Quickpath Interface outbound +link 1 is full. +.It Li QPI_TX_HEADER.BUSY.LINK_1 +.Pq Event 42H , Umask 08H +Number of cycles that the header buffer in the Quickpath Interface outbound +link 1 is busy. +.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0 +.Pq Event 43H , Umask 01H +Number of cycles that snoop packets incoming to the Quickpath Interface link +0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) +does not have any available entries. +.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1 +.Pq Event 43H , Umask 02H +Number of cycles that snoop packets incoming to the Quickpath Interface link +1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) +does not have any available entries. +.It Li DRAM_OPEN.CH0 +.Pq Event 60H , Umask 01H +Counts number of DRAM Channel 0 open commands issued either for read or +write. To read or write data, the referenced DRAM page must first be opened. +.It Li DRAM_OPEN.CH1 +.Pq Event 60H , Umask 02H +Counts number of DRAM Channel 1 open commands issued either for read or +write. To read or write data, the referenced DRAM page must first be opened. +.It Li DRAM_OPEN.CH2 +.Pq Event 60H , Umask 04H +Counts number of DRAM Channel 2 open commands issued either for read or +write. To read or write data, the referenced DRAM page must first be opened. +.It Li DRAM_PAGE_CLOSE.CH0 +.Pq Event 61H , Umask 01H +DRAM channel 0 command issued to CLOSE a page due to page idle timer +expiration. Closing a page is done by issuing a precharge. +.It Li DRAM_PAGE_CLOSE.CH1 +.Pq Event 61H , Umask 02H +DRAM channel 1 command issued to CLOSE a page due to page idle timer +expiration. Closing a page is done by issuing a precharge. +.It Li DRAM_PAGE_CLOSE.CH2 +.Pq Event 61H , Umask 04H +DRAM channel 2 command issued to CLOSE a page due to page idle timer +expiration. Closing a page is done by issuing a precharge. +.It Li DRAM_PAGE_MISS.CH0 +.Pq Event 62H , Umask 01H +Counts the number of precharges (PRE) that were issued to DRAM channel 0 +because there was a page miss. A page miss refers to a situation in which a +page is currently open and another page from the same bank needs to be +opened. The new page experiences a page miss. Closing of the old page is +done by issuing a precharge. +.It Li DRAM_PAGE_MISS.CH1 +.Pq Event 62H , Umask 02H +Counts the number of precharges (PRE) that were issued to DRAM channel 1 +because there was a page miss. A page miss refers to a situation in which a +page is currently open and another page from the same bank needs to be +opened. The new page experiences a page miss. Closing of the old page is +done by issuing a precharge. +.It Li DRAM_PAGE_MISS.CH2 +.Pq Event 62H , Umask 04H +Counts the number of precharges (PRE) that were issued to DRAM channel 2 +because there was a page miss. A page miss refers to a situation in which a +page is currently open and another page from the same bank needs to be +opened. The new page experiences a page miss. Closing of the old page is +done by issuing a precharge. +.It Li DRAM_READ_CAS.CH0 +.Pq Event 63H , Umask 01H +Counts the number of times a read CAS command was issued on DRAM channel 0. +.It Li DRAM_READ_CAS.AUTOPRE_CH0 +.Pq Event 63H , Umask 02H +Counts the number of times a read CAS command was issued on DRAM channel 0 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_READ_CAS.CH1 +.Pq Event 63H , Umask 04H +Counts the number of times a read CAS command was issued on DRAM channel 1. +.It Li DRAM_READ_CAS.AUTOPRE_CH1 +.Pq Event 63H , Umask 08H +Counts the number of times a read CAS command was issued on DRAM channel 1 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_READ_CAS.CH2 +.Pq Event 63H , Umask 10H +Counts the number of times a read CAS command was issued on DRAM channel 2. +.It Li DRAM_READ_CAS.AUTOPRE_CH2 +.Pq Event 63H , Umask 20H +Counts the number of times a read CAS command was issued on DRAM channel 2 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_WRITE_CAS.CH0 +.Pq Event 64H , Umask 01H +Counts the number of times a write CAS command was issued on DRAM channel 0. +.It Li DRAM_WRITE_CAS.AUTOPRE_CH0 +.Pq Event 64H , Umask 02H +Counts the number of times a write CAS command was issued on DRAM channel 0 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_WRITE_CAS.CH1 +.Pq Event 64H , Umask 04H +Counts the number of times a write CAS command was issued on DRAM channel 1. +.It Li DRAM_WRITE_CAS.AUTOPRE_CH1 +.Pq Event 64H , Umask 08H +Counts the number of times a write CAS command was issued on DRAM channel 1 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_WRITE_CAS.CH2 +.Pq Event 64H , Umask 10H +Counts the number of times a write CAS command was issued on DRAM channel 2. +.It Li DRAM_WRITE_CAS.AUTOPRE_CH2 +.Pq Event 64H , Umask 20H +Counts the number of times a write CAS command was issued on DRAM channel 2 +where the command issued used the auto-precharge (auto page close) mode. +.It Li DRAM_REFRESH.CH0 +.Pq Event 65H , Umask 01H +Counts number of DRAM channel 0 refresh commands. DRAM loses data content +over time. In order to keep correct data content, the data values have to be +refreshed periodically. +.It Li DRAM_REFRESH.CH1 +.Pq Event 65H , Umask 02H +Counts number of DRAM channel 1 refresh commands. DRAM loses data content +over time. In order to keep correct data content, the data values have to be +refreshed periodically. +.It Li DRAM_REFRESH.CH2 +.Pq Event 65H , Umask 04H +Counts number of DRAM channel 2 refresh commands. DRAM loses data content +over time. In order to keep correct data content, the data values have to be +refreshed periodically. +.It Li DRAM_PRE_ALL.CH0 +.Pq Event 66H , Umask 01H +Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close +all open pages in a rank. PREALL is issued when the DRAM needs to be +refreshed or needs to go into a power down mode. +.It Li DRAM_PRE_ALL.CH1 +.Pq Event 66H , Umask 02H +Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close +all open pages in a rank. PREALL is issued when the DRAM needs to be +refreshed or needs to go into a power down mode. +.It Li DRAM_PRE_ALL.CH2 +.Pq Event 66H , Umask 04H +Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close +all open pages in a rank. PREALL is issued when the DRAM needs to be +refreshed or needs to go into a power down mode. +.It Li DRAM_THERMAL_THROTTLED +.Pq Event 67H , Umask 01H +Uncore cycles DRAM was throttled due to its temperature being above the +thermal throttling threshold. +.It Li THERMAL_THROTTLING_TEMP.CORE_0 +.Pq Event 80H , Umask 01H +Cycles that the PCU records that core 0 is above the thermal throttling +threshold temperature. +.It Li THERMAL_THROTTLING_TEMP.CORE_1 +.Pq Event 80H , Umask 02H +Cycles that the PCU records that core 1 is above the thermal throttling +threshold temperature. +.It Li THERMAL_THROTTLING_TEMP.CORE_2 +.Pq Event 80H , Umask 04H +Cycles that the PCU records that core 2 is above the thermal throttling +threshold temperature. +.It Li THERMAL_THROTTLING_TEMP.CORE_3 +.Pq Event 80H , Umask 08H +Cycles that the PCU records that core 3 is above the thermal throttling +threshold temperature. +.It Li THERMAL_THROTTLED_TEMP.CORE_0 +.Pq Event 81H , Umask 01H +Cycles that the PCU records that core 0 is in the power throttled state due +to cores temperature being above the thermal throttling threshold. +.It Li THERMAL_THROTTLED_TEMP.CORE_1 +.Pq Event 81H , Umask 02H +Cycles that the PCU records that core 1 is in the power throttled state due +to cores temperature being above the thermal throttling threshold. +.It Li THERMAL_THROTTLED_TEMP.CORE_2 +.Pq Event 81H , Umask 04H +Cycles that the PCU records that core 2 is in the power throttled state due +to cores temperature being above the thermal throttling threshold. +.It Li THERMAL_THROTTLED_TEMP.CORE_3 +.Pq Event 81H , Umask 08H +Cycles that the PCU records that core 3 is in the power throttled state due +to cores temperature being above the thermal throttling threshold. +.It Li PROCHOT_ASSERTION +.Pq Event 82H , Umask 01H +Number of system assertions of PROCHOT indicating the entire processor has +exceeded the thermal limit. +.It Li THERMAL_THROTTLING_PROCHOT.CORE_0 +.Pq Event 83H , Umask 01H +Cycles that the PCU records that core 0 is a low power state due to the +system asserting PROCHOT the entire processor has exceeded the thermal +limit. +.It Li THERMAL_THROTTLING_PROCHOT.CORE_1 +.Pq Event 83H , Umask 02H +Cycles that the PCU records that core 1 is a low power state due to the +system asserting PROCHOT the entire processor has exceeded the thermal +limit. +.It Li THERMAL_THROTTLING_PROCHOT.CORE_2 +.Pq Event 83H , Umask 04H +Cycles that the PCU records that core 2 is a low power state due to the +system asserting PROCHOT the entire processor has exceeded the thermal +limit. +.It Li THERMAL_THROTTLING_PROCHOT.CORE_3 +.Pq Event 83H , Umask 08H +Cycles that the PCU records that core 3 is a low power state due to the +system asserting PROCHOT the entire processor has exceeded the thermal +limit. +.It Li TURBO_MODE.CORE_0 +.Pq Event 84H , Umask 01H +Uncore cycles that core 0 is operating in turbo mode. +.It Li TURBO_MODE.CORE_1 +.Pq Event 84H , Umask 02H +Uncore cycles that core 1 is operating in turbo mode. +.It Li TURBO_MODE.CORE_2 +.Pq Event 84H , Umask 04H +Uncore cycles that core 2 is operating in turbo mode. +.It Li TURBO_MODE.CORE_3 +.Pq Event 84H , Umask 08H +Uncore cycles that core 3 is operating in turbo mode. +.It Li CYCLES_UNHALTED_L3_FLL_ENABLE +.Pq Event 85H , Umask 02H +Uncore cycles that at least one core is unhalted and all L3 ways are +enabled. +.It Li CYCLES_UNHALTED_L3_FLL_DISABLE +.Pq Event 86H , Umask 01H +Uncore cycles that at least one core is unhalted and all L3 ways are +disabled. +.El +.Sh SEE ALSO +.Xr pmc 3 , +.Xr pmc.atom 3 , +.Xr pmc.core 3 , +.Xr pmc.iaf 3 , +.Xr pmc.ucf 3 , +.Xr pmc.k7 3 , +.Xr pmc.k8 3 , +.Xr pmc.p4 3 , +.Xr pmc.p5 3 , +.Xr pmc.p6 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , +.Xr pmc.westmere 3 , +.Xr pmc.tsc 3 , +.Xr pmc_cpuinfo 3 , +.Xr pmclog 3 , +.Xr hwpmc 4 +.Sh HISTORY +The +.Nm pmc +library first appeared in +.Fx 6.0 . +.Sh AUTHORS +The +.Lb libpmc +library was written by +.An "Joseph Koshy" +.Aq jkoshy@FreeBSD.org . -- cgit v1.1 From 79412ab1a15b9e5939e48ea214459a68d9ec5193 Mon Sep 17 00:00:00 2001 From: ume Date: Sun, 18 Apr 2010 03:52:41 +0000 Subject: MFC ir206152, r206153, r206154: - Stop adding trailing '\n'. The servent_unpack() doesn't expect lines terminated with '\n'. - Treat '+' as special only when in compat mode, and simplify the logic bit. - Reduce duplicate code. --- lib/libc/net/getservent.c | 106 +++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 71 deletions(-) (limited to 'lib') diff --git a/lib/libc/net/getservent.c b/lib/libc/net/getservent.c index ebb7a7d..3dcad3f 100644 --- a/lib/libc/net/getservent.c +++ b/lib/libc/net/getservent.c @@ -207,6 +207,32 @@ servent_unpack(char *p, struct servent *serv, char **aliases, return 0; } +static int +parse_result(struct servent *serv, char *buffer, size_t bufsize, + char *resultbuf, size_t resultbuflen, int *errnop) +{ + char **aliases; + int aliases_size; + + if (bufsize <= resultbuflen + _ALIGNBYTES + sizeof(char *)) { + *errnop = ERANGE; + return (NS_RETURN); + } + aliases = (char **)_ALIGN(&buffer[resultbuflen + 1]); + aliases_size = (buffer + bufsize - (char *)aliases) / sizeof(char *); + if (aliases_size < 1) { + *errnop = ERANGE; + return (NS_RETURN); + } + + memcpy(buffer, resultbuf, resultbuflen); + buffer[resultbuflen] = '\0'; + + if (servent_unpack(buffer, serv, aliases, aliases_size, errnop) != 0) + return ((*errnop == 0) ? NS_NOTFOUND : NS_RETURN); + return (NS_SUCCESS); +} + /* files backend implementation */ static void files_endstate(void *p) @@ -258,8 +284,6 @@ files_servent(void *retval, void *mdata, va_list ap) size_t bufsize; int *errnop; - char **aliases; - int aliases_size; size_t linesize; char *line; char **cp; @@ -315,28 +339,8 @@ files_servent(void *retval, void *mdata, va_list ap) break; } - if (*line=='+') { - if (serv_mdata->compat_mode != 0) - st->compat_mode_active = 1; - } else { - if (bufsize <= linesize + _ALIGNBYTES + - sizeof(char *)) { - *errnop = ERANGE; - rv = NS_RETURN; - break; - } - aliases = (char **)_ALIGN(&buffer[linesize+1]); - aliases_size = (buffer + bufsize - - (char *)aliases) / sizeof(char *); - if (aliases_size < 1) { - *errnop = ERANGE; - rv = NS_RETURN; - break; - } - - memcpy(buffer, line, linesize); - buffer[linesize] = '\0'; - } + if (*line=='+' && serv_mdata->compat_mode != 0) + st->compat_mode_active = 1; } if (st->compat_mode_active != 0) { @@ -367,18 +371,12 @@ files_servent(void *retval, void *mdata, va_list ap) continue; } - rv = servent_unpack(buffer, serv, aliases, aliases_size, + rv = parse_result(serv, buffer, bufsize, line, linesize, errnop); - if (rv !=0 ) { - if (*errnop == 0) { - rv = NS_NOTFOUND; - continue; - } - else { - rv = NS_RETURN; - break; - } - } + if (rv == NS_NOTFOUND) + continue; + if (rv == NS_RETURN) + break; rv = NS_NOTFOUND; switch (serv_mdata->how) { @@ -486,9 +484,6 @@ nis_servent(void *retval, void *mdata, va_list ap) size_t bufsize; int *errnop; - char **aliases; - int aliases_size; - name = NULL; proto = NULL; how = (enum nss_lookup_type)mdata; @@ -594,39 +589,8 @@ nis_servent(void *retval, void *mdata, va_list ap) break; }; - /* we need a room for additional \n symbol */ - if (bufsize <= - resultbuflen + 1 + _ALIGNBYTES + sizeof(char *)) { - *errnop = ERANGE; - rv = NS_RETURN; - break; - } - - aliases = (char **)_ALIGN(&buffer[resultbuflen + 2]); - aliases_size = - (buffer + bufsize - (char *)aliases) / sizeof(char *); - if (aliases_size < 1) { - *errnop = ERANGE; - rv = NS_RETURN; - break; - } - - /* - * servent_unpack expects lines terminated with \n -- - * make it happy - */ - memcpy(buffer, resultbuf, resultbuflen); - buffer[resultbuflen] = '\n'; - buffer[resultbuflen + 1] = '\0'; - - if (servent_unpack(buffer, serv, aliases, aliases_size, - errnop) != 0) { - if (*errnop == 0) - rv = NS_NOTFOUND; - else - rv = NS_RETURN; - } else - rv = NS_SUCCESS; + rv = parse_result(serv, buffer, bufsize, resultbuf, + resultbuflen, errnop); free(resultbuf); } while (!(rv & NS_TERMINATE) && how == nss_lt_all); -- cgit v1.1 From 50edecc45441eac7385beca569f1da14f1b9a490 Mon Sep 17 00:00:00 2001 From: ume Date: Sun, 18 Apr 2010 04:07:32 +0000 Subject: MFC r206155, r206267: Add capability to use a db version of services. It is enabled by specifying `db' as source of services in /etc/nsswitch.conf. --- lib/libc/net/getservent.c | 200 ++++++++++++++++++++++++++++++++++++++++++++++ lib/libc/net/nsdispatch.3 | 3 +- 2 files changed, 202 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/net/getservent.c b/lib/libc/net/getservent.c index 3dcad3f..3228bdc 100644 --- a/lib/libc/net/getservent.c +++ b/lib/libc/net/getservent.c @@ -37,7 +37,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include #include @@ -94,6 +96,19 @@ NSS_TLS_HANDLING(files); static int files_servent(void *, void *, va_list); static int files_setservent(void *, void *, va_list); +/* db backend declarations */ +struct db_state +{ + DB *db; + int stayopen; + int keynum; +}; +static void db_endstate(void *); +NSS_TLS_HANDLING(db); + +static int db_servent(void *, void *, va_list); +static int db_setservent(void *, void *, va_list); + #ifdef YP /* nis backend declarations */ static int nis_servent(void *, void *, va_list); @@ -263,6 +278,8 @@ files_servent(void *retval, void *mdata, va_list ap) { NULL, 0 } }; ns_dtab compat_dtab[] = { + { NSSRC_DB, db_servent, + (void *)((struct servent_mdata *)mdata)->how }, #ifdef YP { NSSRC_NIS, nis_servent, (void *)((struct servent_mdata *)mdata)->how }, @@ -452,6 +469,183 @@ files_setservent(void *retval, void *mdata, va_list ap) return (NS_UNAVAIL); } +/* db backend implementation */ +static void +db_endstate(void *p) +{ + DB *db; + + if (p == NULL) + return; + + db = ((struct db_state *)p)->db; + if (db != NULL) + db->close(db); + + free(p); +} + +static int +db_servent(void *retval, void *mdata, va_list ap) +{ + char buf[BUFSIZ]; + DBT key, data, *result; + DB *db; + + struct db_state *st; + int rv; + int stayopen; + + enum nss_lookup_type how; + char *name; + char *proto; + int port; + + struct servent *serv; + char *buffer; + size_t bufsize; + int *errnop; + + name = NULL; + proto = NULL; + how = (enum nss_lookup_type)mdata; + switch (how) { + case nss_lt_name: + name = va_arg(ap, char *); + proto = va_arg(ap, char *); + break; + case nss_lt_id: + port = va_arg(ap, int); + proto = va_arg(ap, char *); + break; + case nss_lt_all: + break; + default: + return NS_NOTFOUND; + }; + + serv = va_arg(ap, struct servent *); + buffer = va_arg(ap, char *); + bufsize = va_arg(ap, size_t); + errnop = va_arg(ap,int *); + + *errnop = db_getstate(&st); + if (*errnop != 0) + return (NS_UNAVAIL); + + if (how == nss_lt_all && st->keynum < 0) + return (NS_NOTFOUND); + + if (st->db == NULL) { + st->db = dbopen(_PATH_SERVICES_DB, O_RDONLY, 0, DB_HASH, NULL); + if (st->db == NULL) { + *errnop = errno; + return (NS_UNAVAIL); + } + } + + stayopen = (how == nss_lt_all) ? 1 : st->stayopen; + db = st->db; + + do { + switch (how) { + case nss_lt_name: + key.data = buf; + if (proto == NULL) + key.size = snprintf(buf, sizeof(buf), + "\376%s", name); + else + key.size = snprintf(buf, sizeof(buf), + "\376%s/%s", name, proto); + key.size++; + if (db->get(db, &key, &data, 0) != 0 || + db->get(db, &data, &key, 0) != 0) { + rv = NS_NOTFOUND; + goto db_fin; + } + result = &key; + break; + case nss_lt_id: + key.data = buf; + port = htons(port); + if (proto == NULL) + key.size = snprintf(buf, sizeof(buf), + "\377%d", port); + else + key.size = snprintf(buf, sizeof(buf), + "\377%d/%s", port, proto); + key.size++; + if (db->get(db, &key, &data, 0) != 0 || + db->get(db, &data, &key, 0) != 0) { + rv = NS_NOTFOUND; + goto db_fin; + } + result = &key; + break; + case nss_lt_all: + key.data = buf; + key.size = snprintf(buf, sizeof(buf), "%d", + st->keynum++); + key.size++; + if (db->get(db, &key, &data, 0) != 0) { + st->keynum = -1; + rv = NS_NOTFOUND; + goto db_fin; + } + result = &data; + break; + } + + rv = parse_result(serv, buffer, bufsize, result->data, + result->size - 1, errnop); + + } while (!(rv & NS_TERMINATE) && how == nss_lt_all); + +db_fin: + if (!stayopen && st->db != NULL) { + db->close(db); + st->db = NULL; + } + + if (rv == NS_SUCCESS && retval != NULL) + *(struct servent **)retval = serv; + + return (rv); +} + +static int +db_setservent(void *retval, void *mdata, va_list ap) +{ + DB *db; + struct db_state *st; + int rv; + int f; + + rv = db_getstate(&st); + if (rv != 0) + return (NS_UNAVAIL); + + switch ((enum constants)mdata) { + case SETSERVENT: + f = va_arg(ap, int); + st->stayopen |= f; + st->keynum = 0; + break; + case ENDSERVENT: + db = st->db; + if (db != NULL) { + db->close(db); + st->db = NULL; + } + st->stayopen = 0; + break; + default: + break; + }; + + return (NS_UNAVAIL); +} + /* nis backend implementation */ #ifdef YP static void @@ -638,6 +832,7 @@ compat_setservent(void *retval, void *mdata, va_list ap) { NULL, 0 } }; ns_dtab compat_dtab[] = { + { NSSRC_DB, db_setservent, mdata }, #ifdef YP { NSSRC_NIS, nis_setservent, mdata }, #endif @@ -924,6 +1119,7 @@ getservbyname_r(const char *name, const char *proto, struct servent *serv, #endif /* NS_CACHING */ static const ns_dtab dtab[] = { { NSSRC_FILES, files_servent, (void *)&mdata }, + { NSSRC_DB, db_servent, (void *)nss_lt_name }, #ifdef YP { NSSRC_NIS, nis_servent, (void *)nss_lt_name }, #endif @@ -960,6 +1156,7 @@ getservbyport_r(int port, const char *proto, struct servent *serv, #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_servent, (void *)&mdata }, + { NSSRC_DB, db_servent, (void *)nss_lt_id }, #ifdef YP { NSSRC_NIS, nis_servent, (void *)nss_lt_id }, #endif @@ -995,6 +1192,7 @@ getservent_r(struct servent *serv, char *buffer, size_t bufsize, #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_servent, (void *)&mdata }, + { NSSRC_DB, db_servent, (void *)nss_lt_all }, #ifdef YP { NSSRC_NIS, nis_servent, (void *)nss_lt_all }, #endif @@ -1027,6 +1225,7 @@ setservent(int stayopen) #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_setservent, (void *)SETSERVENT }, + { NSSRC_DB, db_setservent, (void *)SETSERVENT }, #ifdef YP { NSSRC_NIS, nis_setservent, (void *)SETSERVENT }, #endif @@ -1051,6 +1250,7 @@ endservent() #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_setservent, (void *)ENDSERVENT }, + { NSSRC_DB, db_setservent, (void *)ENDSERVENT }, #ifdef YP { NSSRC_NIS, nis_setservent, (void *)ENDSERVENT }, #endif diff --git a/lib/libc/net/nsdispatch.3 b/lib/libc/net/nsdispatch.3 index cf15002..8ae7540 100644 --- a/lib/libc/net/nsdispatch.3 +++ b/lib/libc/net/nsdispatch.3 @@ -32,7 +32,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 22, 2007 +.Dd April 4, 2010 .Dt NSDISPATCH 3 .Os .Sh NAME @@ -176,6 +176,7 @@ While there is support for arbitrary sources, the following .Bl -column NSSRC_COMPAT compat -offset indent .It Sy "#define value" .It Dv NSSRC_FILES Ta """files"" +.It Dv NSSRC_DB Ta """db"" .It Dv NSSRC_DNS Ta """dns"" .It Dv NSSRC_NIS Ta """nis"" .It Dv NSSRC_COMPAT Ta """compat"" -- cgit v1.1 From 32d0c03b86b06a7797753e8d40aaea46fae6ce29 Mon Sep 17 00:00:00 2001 From: delphij Date: Tue, 20 Apr 2010 22:57:05 +0000 Subject: MFC assembler version of match functions for amd64 and i386(*). This gives approximately 15% improvement on compression case. (*) i386 assembler version is enabled ONLY when MACHINE_CPU have 'i686' which is not default on FreeBSD/i386. One can specify for instance CPUTYPE=pentium4 in /etc/make.conf to get this feature. --- lib/libz/Makefile | 12 + lib/libz/contrib/README.contrib | 77 +++++ lib/libz/contrib/asm686/README.686 | 51 +++ lib/libz/contrib/asm686/match.S | 343 ++++++++++++++++++++ lib/libz/contrib/gcc_gvmat64/gvmat64.S | 574 +++++++++++++++++++++++++++++++++ 5 files changed, 1057 insertions(+) create mode 100644 lib/libz/contrib/README.contrib create mode 100644 lib/libz/contrib/asm686/README.686 create mode 100644 lib/libz/contrib/asm686/match.S create mode 100644 lib/libz/contrib/gcc_gvmat64/gvmat64.S (limited to 'lib') diff --git a/lib/libz/Makefile b/lib/libz/Makefile index 63383b4..1390d08 100644 --- a/lib/libz/Makefile +++ b/lib/libz/Makefile @@ -19,6 +19,18 @@ SRCS = adler32.c compress.c crc32.c gzio.c uncompr.c deflate.c trees.c \ zutil.c inflate.c inftrees.c inffast.c zopen.c infback.c INCS= zconf.h zlib.h +.if ${MACHINE_ARCH} == "i386" && ${MACHINE_CPU:M*i686*} +.PATH: ${.CURDIR}/contrib/asm686 +SRCS+= match.S +CFLAGS+= -DASMV -DNO_UNDERLINE +.endif + +.if ${MACHINE_ARCH} == "amd64" +.PATH: ${.CURDIR}/contrib/gcc_gvmat64 +SRCS+= gvmat64.S +CFLAGS+= -DASMV -DNO_UNDERLINE +.endif + minigzip: all minigzip.o $(CC) -o minigzip minigzip.o -L. -lz diff --git a/lib/libz/contrib/README.contrib b/lib/libz/contrib/README.contrib new file mode 100644 index 0000000..dd2285d --- /dev/null +++ b/lib/libz/contrib/README.contrib @@ -0,0 +1,77 @@ +All files under this contrib directory are UNSUPPORTED. There were +provided by users of zlib and were not tested by the authors of zlib. +Use at your own risk. Please contact the authors of the contributions +for help about these, not the zlib authors. Thanks. + + +ada/ by Dmitriy Anisimkov + Support for Ada + See http://zlib-ada.sourceforge.net/ + +amd64/ by Mikhail Teterin + asm code for AMD64 + See patch at http://www.freebsd.org/cgi/query-pr.cgi?pr=bin/96393 + +asm686/ by Brian Raiter + asm code for Pentium and PPro/PII, using the AT&T (GNU as) syntax + See http://www.muppetlabs.com/~breadbox/software/assembly.html + +blast/ by Mark Adler + Decompressor for output of PKWare Data Compression Library (DCL) + +delphi/ by Cosmin Truta + Support for Delphi and C++ Builder + +dotzlib/ by Henrik Ravn + Support for Microsoft .Net and Visual C++ .Net + +gcc_gvmat64/by Gilles Vollant + GCC Version of x86 64-bit (AMD64 and Intel EM64t) code for x64 + assembler to replace longest_match() and inflate_fast() + +infback9/ by Mark Adler + Unsupported diffs to infback to decode the deflate64 format + +inflate86/ by Chris Anderson + Tuned x86 gcc asm code to replace inflate_fast() + +iostream/ by Kevin Ruland + A C++ I/O streams interface to the zlib gz* functions + +iostream2/ by Tyge Løvset + Another C++ I/O streams interface + +iostream3/ by Ludwig Schwardt + and Kevin Ruland + Yet another C++ I/O streams interface + +masmx64/ by Gilles Vollant + x86 64-bit (AMD64 and Intel EM64t) code for x64 assembler to + replace longest_match() and inflate_fast(), also masm x86 + 64-bits translation of Chris Anderson inflate_fast() + +masmx86/ by Gilles Vollant + x86 asm code to replace longest_match() and inflate_fast(), + for Visual C++ and MASM (32 bits). + Based on Brian Raiter (asm686) and Chris Anderson (inflate86) + +minizip/ by Gilles Vollant + Mini zip and unzip based on zlib + Includes Zip64 support by Mathias Svensson + See http://www.winimage.com/zLibDll/unzip.html + +pascal/ by Bob Dellaca et al. + Support for Pascal + +puff/ by Mark Adler + Small, low memory usage inflate. Also serves to provide an + unambiguous description of the deflate format. + +testzlib/ by Gilles Vollant + Example of the use of zlib + +untgz/ by Pedro A. Aranda Gutierrez + A very simple tar.gz file extractor using zlib + +vstudio/ by Gilles Vollant + Building a minizip-enhanced zlib with Microsoft Visual Studio diff --git a/lib/libz/contrib/asm686/README.686 b/lib/libz/contrib/asm686/README.686 new file mode 100644 index 0000000..a0bf3be --- /dev/null +++ b/lib/libz/contrib/asm686/README.686 @@ -0,0 +1,51 @@ +This is a patched version of zlib, modified to use +Pentium-Pro-optimized assembly code in the deflation algorithm. The +files changed/added by this patch are: + +README.686 +match.S + +The speedup that this patch provides varies, depending on whether the +compiler used to build the original version of zlib falls afoul of the +PPro's speed traps. My own tests show a speedup of around 10-20% at +the default compression level, and 20-30% using -9, against a version +compiled using gcc 2.7.2.3. Your mileage may vary. + +Note that this code has been tailored for the PPro/PII in particular, +and will not perform particuarly well on a Pentium. + +If you are using an assembler other than GNU as, you will have to +translate match.S to use your assembler's syntax. (Have fun.) + +Brian Raiter +breadbox@muppetlabs.com +April, 1998 + + +Added for zlib 1.1.3: + +The patches come from +http://www.muppetlabs.com/~breadbox/software/assembly.html + +To compile zlib with this asm file, copy match.S to the zlib directory +then do: + +CFLAGS="-O3 -DASMV" ./configure +make OBJA=match.o + + +Update: + +I've been ignoring these assembly routines for years, believing that +gcc's generated code had caught up with it sometime around gcc 2.95 +and the major rearchitecting of the Pentium 4. However, I recently +learned that, despite what I believed, this code still has some life +in it. On the Pentium 4 and AMD64 chips, it continues to run about 8% +faster than the code produced by gcc 4.1. + +In acknowledgement of its continuing usefulness, I've altered the +license to match that of the rest of zlib. Share and Enjoy! + +Brian Raiter +breadbox@muppetlabs.com +April, 2007 diff --git a/lib/libz/contrib/asm686/match.S b/lib/libz/contrib/asm686/match.S new file mode 100644 index 0000000..06817e1 --- /dev/null +++ b/lib/libz/contrib/asm686/match.S @@ -0,0 +1,343 @@ +/* match.S -- x86 assembly version of the zlib longest_match() function. + * Optimized for the Intel 686 chips (PPro and later). + * + * Copyright (C) 1998, 2007 Brian Raiter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the author be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#ifndef NO_UNDERLINE +#define match_init _match_init +#define longest_match _longest_match +#endif + +#define MAX_MATCH (258) +#define MIN_MATCH (3) +#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) +#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) + +/* stack frame offsets */ + +#define chainlenwmask 0 /* high word: current chain len */ + /* low word: s->wmask */ +#define window 4 /* local copy of s->window */ +#define windowbestlen 8 /* s->window + bestlen */ +#define scanstart 16 /* first two bytes of string */ +#define scanend 12 /* last two bytes of string */ +#define scanalign 20 /* dword-misalignment of string */ +#define nicematch 24 /* a good enough match size */ +#define bestlen 28 /* size of best match so far */ +#define scan 32 /* ptr to string wanting match */ + +#define LocalVarsSize (36) +/* saved ebx 36 */ +/* saved edi 40 */ +/* saved esi 44 */ +/* saved ebp 48 */ +/* return address 52 */ +#define deflatestate 56 /* the function arguments */ +#define curmatch 60 + +/* All the +zlib1222add offsets are due to the addition of fields + * in zlib in the deflate_state structure since the asm code was first written + * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). + * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). + * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + */ + +#define zlib1222add (8) + +#define dsWSize (36+zlib1222add) +#define dsWMask (44+zlib1222add) +#define dsWindow (48+zlib1222add) +#define dsPrev (56+zlib1222add) +#define dsMatchLen (88+zlib1222add) +#define dsPrevMatch (92+zlib1222add) +#define dsStrStart (100+zlib1222add) +#define dsMatchStart (104+zlib1222add) +#define dsLookahead (108+zlib1222add) +#define dsPrevLen (112+zlib1222add) +#define dsMaxChainLen (116+zlib1222add) +#define dsGoodMatch (132+zlib1222add) +#define dsNiceMatch (136+zlib1222add) + + +.file "match.S" + +.globl match_init, longest_match + +.text + +/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ + +longest_match: + +/* Save registers that the compiler may be using, and adjust %esp to */ +/* make room for our stack frame. */ + + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + subl $LocalVarsSize, %esp + +/* Retrieve the function arguments. %ecx will hold cur_match */ +/* throughout the entire function. %edx will hold the pointer to the */ +/* deflate_state structure during the function's setup (before */ +/* entering the main loop). */ + + movl deflatestate(%esp), %edx + movl curmatch(%esp), %ecx + +/* uInt wmask = s->w_mask; */ +/* unsigned chain_length = s->max_chain_length; */ +/* if (s->prev_length >= s->good_match) { */ +/* chain_length >>= 2; */ +/* } */ + + movl dsPrevLen(%edx), %eax + movl dsGoodMatch(%edx), %ebx + cmpl %ebx, %eax + movl dsWMask(%edx), %eax + movl dsMaxChainLen(%edx), %ebx + jl LastMatchGood + shrl $2, %ebx +LastMatchGood: + +/* chainlen is decremented once beforehand so that the function can */ +/* use the sign flag instead of the zero flag for the exit test. */ +/* It is then shifted into the high word, to make room for the wmask */ +/* value, which it will always accompany. */ + + decl %ebx + shll $16, %ebx + orl %eax, %ebx + movl %ebx, chainlenwmask(%esp) + +/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ + + movl dsNiceMatch(%edx), %eax + movl dsLookahead(%edx), %ebx + cmpl %eax, %ebx + jl LookaheadLess + movl %eax, %ebx +LookaheadLess: movl %ebx, nicematch(%esp) + +/* register Bytef *scan = s->window + s->strstart; */ + + movl dsWindow(%edx), %esi + movl %esi, window(%esp) + movl dsStrStart(%edx), %ebp + lea (%esi,%ebp), %edi + movl %edi, scan(%esp) + +/* Determine how many bytes the scan ptr is off from being */ +/* dword-aligned. */ + + movl %edi, %eax + negl %eax + andl $3, %eax + movl %eax, scanalign(%esp) + +/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ +/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ + + movl dsWSize(%edx), %eax + subl $MIN_LOOKAHEAD, %eax + subl %eax, %ebp + jg LimitPositive + xorl %ebp, %ebp +LimitPositive: + +/* int best_len = s->prev_length; */ + + movl dsPrevLen(%edx), %eax + movl %eax, bestlen(%esp) + +/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ + + addl %eax, %esi + movl %esi, windowbestlen(%esp) + +/* register ush scan_start = *(ushf*)scan; */ +/* register ush scan_end = *(ushf*)(scan+best_len-1); */ +/* Posf *prev = s->prev; */ + + movzwl (%edi), %ebx + movl %ebx, scanstart(%esp) + movzwl -1(%edi,%eax), %ebx + movl %ebx, scanend(%esp) + movl dsPrev(%edx), %edi + +/* Jump into the main loop. */ + + movl chainlenwmask(%esp), %edx + jmp LoopEntry + +.balign 16 + +/* do { + * match = s->window + cur_match; + * if (*(ushf*)(match+best_len-1) != scan_end || + * *(ushf*)match != scan_start) continue; + * [...] + * } while ((cur_match = prev[cur_match & wmask]) > limit + * && --chain_length != 0); + * + * Here is the inner loop of the function. The function will spend the + * majority of its time in this loop, and majority of that time will + * be spent in the first ten instructions. + * + * Within this loop: + * %ebx = scanend + * %ecx = curmatch + * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) + * %esi = windowbestlen - i.e., (window + bestlen) + * %edi = prev + * %ebp = limit + */ +LookupLoop: + andl %edx, %ecx + movzwl (%edi,%ecx,2), %ecx + cmpl %ebp, %ecx + jbe LeaveNow + subl $0x00010000, %edx + js LeaveNow +LoopEntry: movzwl -1(%esi,%ecx), %eax + cmpl %ebx, %eax + jnz LookupLoop + movl window(%esp), %eax + movzwl (%eax,%ecx), %eax + cmpl scanstart(%esp), %eax + jnz LookupLoop + +/* Store the current value of chainlen. */ + + movl %edx, chainlenwmask(%esp) + +/* Point %edi to the string under scrutiny, and %esi to the string we */ +/* are hoping to match it up with. In actuality, %esi and %edi are */ +/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ +/* initialized to -(MAX_MATCH_8 - scanalign). */ + + movl window(%esp), %esi + movl scan(%esp), %edi + addl %ecx, %esi + movl scanalign(%esp), %eax + movl $(-MAX_MATCH_8), %edx + lea MAX_MATCH_8(%edi,%eax), %edi + lea MAX_MATCH_8(%esi,%eax), %esi + +/* Test the strings for equality, 8 bytes at a time. At the end, + * adjust %edx so that it is offset to the exact byte that mismatched. + * + * We already know at this point that the first three bytes of the + * strings match each other, and they can be safely passed over before + * starting the compare loop. So what this code does is skip over 0-3 + * bytes, as much as necessary in order to dword-align the %edi + * pointer. (%esi will still be misaligned three times out of four.) + * + * It should be confessed that this loop usually does not represent + * much of the total running time. Replacing it with a more + * straightforward "rep cmpsb" would not drastically degrade + * performance. + */ +LoopCmps: + movl (%esi,%edx), %eax + xorl (%edi,%edx), %eax + jnz LeaveLoopCmps + movl 4(%esi,%edx), %eax + xorl 4(%edi,%edx), %eax + jnz LeaveLoopCmps4 + addl $8, %edx + jnz LoopCmps + jmp LenMaximum +LeaveLoopCmps4: addl $4, %edx +LeaveLoopCmps: testl $0x0000FFFF, %eax + jnz LenLower + addl $2, %edx + shrl $16, %eax +LenLower: subb $1, %al + adcl $0, %edx + +/* Calculate the length of the match. If it is longer than MAX_MATCH, */ +/* then automatically accept it as the best possible match and leave. */ + + lea (%edi,%edx), %eax + movl scan(%esp), %edi + subl %edi, %eax + cmpl $MAX_MATCH, %eax + jge LenMaximum + +/* If the length of the match is not longer than the best match we */ +/* have so far, then forget it and return to the lookup loop. */ + + movl deflatestate(%esp), %edx + movl bestlen(%esp), %ebx + cmpl %ebx, %eax + jg LongerMatch + movl windowbestlen(%esp), %esi + movl dsPrev(%edx), %edi + movl scanend(%esp), %ebx + movl chainlenwmask(%esp), %edx + jmp LookupLoop + +/* s->match_start = cur_match; */ +/* best_len = len; */ +/* if (len >= nice_match) break; */ +/* scan_end = *(ushf*)(scan+best_len-1); */ + +LongerMatch: movl nicematch(%esp), %ebx + movl %eax, bestlen(%esp) + movl %ecx, dsMatchStart(%edx) + cmpl %ebx, %eax + jge LeaveNow + movl window(%esp), %esi + addl %eax, %esi + movl %esi, windowbestlen(%esp) + movzwl -1(%edi,%eax), %ebx + movl dsPrev(%edx), %edi + movl %ebx, scanend(%esp) + movl chainlenwmask(%esp), %edx + jmp LookupLoop + +/* Accept the current string, with the maximum possible length. */ + +LenMaximum: movl deflatestate(%esp), %edx + movl $MAX_MATCH, bestlen(%esp) + movl %ecx, dsMatchStart(%edx) + +/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ +/* return s->lookahead; */ + +LeaveNow: + movl deflatestate(%esp), %edx + movl bestlen(%esp), %ebx + movl dsLookahead(%edx), %eax + cmpl %eax, %ebx + jg LookaheadRet + movl %ebx, %eax +LookaheadRet: + +/* Restore the stack and return from whence we came. */ + + addl $LocalVarsSize, %esp + popl %ebx + popl %esi + popl %edi + popl %ebp +match_init: ret diff --git a/lib/libz/contrib/gcc_gvmat64/gvmat64.S b/lib/libz/contrib/gcc_gvmat64/gvmat64.S new file mode 100644 index 0000000..23309fa --- /dev/null +++ b/lib/libz/contrib/gcc_gvmat64/gvmat64.S @@ -0,0 +1,574 @@ +/* +;uInt longest_match_x64( +; deflate_state *s, +; IPos cur_match); // current match + +; gvmat64.S -- Asm portion of the optimized longest_match for 32 bits x86_64 +; (AMD64 on Athlon 64, Opteron, Phenom +; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7) +; this file is translation from gvmat64.asm to GCC 4.x (for Linux, Mac XCode) +; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant. +; +; File written by Gilles Vollant, by converting to assembly the longest_match +; from Jean-loup Gailly in deflate.c of zLib and infoZip zip. +; and by taking inspiration on asm686 with masm, optimised assembly code +; from Brian Raiter, written 1998 +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software +; 3. This notice may not be removed or altered from any source distribution. +; +; http://www.zlib.net +; http://www.winimage.com/zLibDll +; http://www.muppetlabs.com/~breadbox/software/assembly.html +; +; to compile this file for zLib, I use option: +; gcc -c -arch x86_64 gvmat64.S + + +;uInt longest_match(s, cur_match) +; deflate_state *s; +; IPos cur_match; // current match / +; +; with XCode for Mac, I had strange error with some jump on intel syntax +; this is why BEFORE_JMP and AFTER_JMP are used + */ + + +#define BEFORE_JMP .att_syntax +#define AFTER_JMP .intel_syntax noprefix + +#ifndef NO_UNDERLINE +# define match_init _match_init +# define longest_match _longest_match +#endif + +.intel_syntax noprefix + +.globl match_init, longest_match +.text +longest_match: + + + +#define LocalVarsSize 96 +/* +; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12 +; free register : r14,r15 +; register can be saved : rsp +*/ + +#define chainlenwmask (rsp + 8 - LocalVarsSize) +#define nicematch (rsp + 16 - LocalVarsSize) + +#define save_rdi (rsp + 24 - LocalVarsSize) +#define save_rsi (rsp + 32 - LocalVarsSize) +#define save_rbx (rsp + 40 - LocalVarsSize) +#define save_rbp (rsp + 48 - LocalVarsSize) +#define save_r12 (rsp + 56 - LocalVarsSize) +#define save_r13 (rsp + 64 - LocalVarsSize) +#define save_r14 (rsp + 72 - LocalVarsSize) +#define save_r15 (rsp + 80 - LocalVarsSize) + + +/* +; all the +4 offsets are due to the addition of pending_buf_size (in zlib +; in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, remove the +4). +; Note : these value are good with a 8 bytes boundary pack structure +*/ + +#define MAX_MATCH 258 +#define MIN_MATCH 3 +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) + +/* +;;; Offsets for fields in the deflate_state structure. These numbers +;;; are calculated from the definition of deflate_state, with the +;;; assumption that the compiler will dword-align the fields. (Thus, +;;; changing the definition of deflate_state could easily cause this +;;; program to crash horribly, without so much as a warning at +;;; compile time. Sigh.) + +; all the +zlib1222add offsets are due to the addition of fields +; in zlib in the deflate_state structure since the asm code was first written +; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). +; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). +; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). +*/ + + + +/* you can check the structure offset by running + +#include +#include +#include "deflate.h" + +void print_depl() +{ +deflate_state ds; +deflate_state *s=&ds; +printf("size pointer=%u\n",(int)sizeof(void*)); + +printf("#define dsWSize %u\n",(int)(((char*)&(s->w_size))-((char*)s))); +printf("#define dsWMask %u\n",(int)(((char*)&(s->w_mask))-((char*)s))); +printf("#define dsWindow %u\n",(int)(((char*)&(s->window))-((char*)s))); +printf("#define dsPrev %u\n",(int)(((char*)&(s->prev))-((char*)s))); +printf("#define dsMatchLen %u\n",(int)(((char*)&(s->match_length))-((char*)s))); +printf("#define dsPrevMatch %u\n",(int)(((char*)&(s->prev_match))-((char*)s))); +printf("#define dsStrStart %u\n",(int)(((char*)&(s->strstart))-((char*)s))); +printf("#define dsMatchStart %u\n",(int)(((char*)&(s->match_start))-((char*)s))); +printf("#define dsLookahead %u\n",(int)(((char*)&(s->lookahead))-((char*)s))); +printf("#define dsPrevLen %u\n",(int)(((char*)&(s->prev_length))-((char*)s))); +printf("#define dsMaxChainLen %u\n",(int)(((char*)&(s->max_chain_length))-((char*)s))); +printf("#define dsGoodMatch %u\n",(int)(((char*)&(s->good_match))-((char*)s))); +printf("#define dsNiceMatch %u\n",(int)(((char*)&(s->nice_match))-((char*)s))); +} +*/ + +#define dsWSize 68 +#define dsWMask 76 +#define dsWindow 80 +#define dsPrev 96 +#define dsMatchLen 144 +#define dsPrevMatch 148 +#define dsStrStart 156 +#define dsMatchStart 160 +#define dsLookahead 164 +#define dsPrevLen 168 +#define dsMaxChainLen 172 +#define dsGoodMatch 188 +#define dsNiceMatch 192 + +#define window_size [ rcx + dsWSize] +#define WMask [ rcx + dsWMask] +#define window_ad [ rcx + dsWindow] +#define prev_ad [ rcx + dsPrev] +#define strstart [ rcx + dsStrStart] +#define match_start [ rcx + dsMatchStart] +#define Lookahead [ rcx + dsLookahead] //; 0ffffffffh on infozip +#define prev_length [ rcx + dsPrevLen] +#define max_chain_length [ rcx + dsMaxChainLen] +#define good_match [ rcx + dsGoodMatch] +#define nice_match [ rcx + dsNiceMatch] + +/* +; windows: +; parameter 1 in rcx(deflate state s), param 2 in rdx (cur match) + +; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and +; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp +; +; All registers must be preserved across the call, except for +; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch. + +; +; gcc on macosx-linux: +; see http://www.x86-64.org/documentation/abi-0.99.pdf +; param 1 in rdi, param 2 in rsi +; rbx, rsp, rbp, r12 to r15 must be preserved + +;;; Save registers that the compiler may be using, and adjust esp to +;;; make room for our stack frame. + + +;;; Retrieve the function arguments. r8d will hold cur_match +;;; throughout the entire function. edx will hold the pointer to the +;;; deflate_state structure during the function's setup (before +;;; entering the main loop. + +; ms: parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match) +; mac: param 1 in rdi, param 2 rsi +; this clear high 32 bits of r8, which can be garbage in both r8 and rdx +*/ + mov [save_rbx],rbx + mov [save_rbp],rbp + + + mov rcx,rdi + + mov r8d,esi + + + mov [save_r12],r12 + mov [save_r13],r13 + mov [save_r14],r14 + mov [save_r15],r15 + + +//;;; uInt wmask = s->w_mask; +//;;; unsigned chain_length = s->max_chain_length; +//;;; if (s->prev_length >= s->good_match) { +//;;; chain_length >>= 2; +//;;; } + + + mov edi, prev_length + mov esi, good_match + mov eax, WMask + mov ebx, max_chain_length + cmp edi, esi + jl LastMatchGood + shr ebx, 2 +LastMatchGood: + +//;;; chainlen is decremented once beforehand so that the function can +//;;; use the sign flag instead of the zero flag for the exit test. +//;;; It is then shifted into the high word, to make room for the wmask +//;;; value, which it will always accompany. + + dec ebx + shl ebx, 16 + or ebx, eax + +//;;; on zlib only +//;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + + + mov eax, nice_match + mov [chainlenwmask], ebx + mov r10d, Lookahead + cmp r10d, eax + cmovnl r10d, eax + mov [nicematch],r10d + + + +//;;; register Bytef *scan = s->window + s->strstart; + mov r10, window_ad + mov ebp, strstart + lea r13, [r10 + rbp] + +//;;; Determine how many bytes the scan ptr is off from being +//;;; dword-aligned. + + mov r9,r13 + neg r13 + and r13,3 + +//;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? +//;;; s->strstart - (IPos)MAX_DIST(s) : NIL; + + + mov eax, window_size + sub eax, MIN_LOOKAHEAD + + + xor edi,edi + sub ebp, eax + + mov r11d, prev_length + + cmovng ebp,edi + +//;;; int best_len = s->prev_length; + + +//;;; Store the sum of s->window + best_len in esi locally, and in esi. + + lea rsi,[r10+r11] + +//;;; register ush scan_start = *(ushf*)scan; +//;;; register ush scan_end = *(ushf*)(scan+best_len-1); +//;;; Posf *prev = s->prev; + + movzx r12d,word ptr [r9] + movzx ebx, word ptr [r9 + r11 - 1] + + mov rdi, prev_ad + +//;;; Jump into the main loop. + + mov edx, [chainlenwmask] + + cmp bx,word ptr [rsi + r8 - 1] + jz LookupLoopIsZero + + + +LookupLoop1: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + jbe LeaveNow + + + + sub edx, 0x00010000 + BEFORE_JMP + js LeaveNow + AFTER_JMP + +LoopEntry1: + cmp bx,word ptr [rsi + r8 - 1] + BEFORE_JMP + jz LookupLoopIsZero + AFTER_JMP + +LookupLoop2: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + BEFORE_JMP + jbe LeaveNow + AFTER_JMP + sub edx, 0x00010000 + BEFORE_JMP + js LeaveNow + AFTER_JMP + +LoopEntry2: + cmp bx,word ptr [rsi + r8 - 1] + BEFORE_JMP + jz LookupLoopIsZero + AFTER_JMP + +LookupLoop4: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + BEFORE_JMP + jbe LeaveNow + AFTER_JMP + sub edx, 0x00010000 + BEFORE_JMP + js LeaveNow + AFTER_JMP + +LoopEntry4: + + cmp bx,word ptr [rsi + r8 - 1] + BEFORE_JMP + jnz LookupLoop1 + jmp LookupLoopIsZero + AFTER_JMP +/* +;;; do { +;;; match = s->window + cur_match; +;;; if (*(ushf*)(match+best_len-1) != scan_end || +;;; *(ushf*)match != scan_start) continue; +;;; [...] +;;; } while ((cur_match = prev[cur_match & wmask]) > limit +;;; && --chain_length != 0); +;;; +;;; Here is the inner loop of the function. The function will spend the +;;; majority of its time in this loop, and majority of that time will +;;; be spent in the first ten instructions. +;;; +;;; Within this loop: +;;; ebx = scanend +;;; r8d = curmatch +;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) +;;; esi = windowbestlen - i.e., (window + bestlen) +;;; edi = prev +;;; ebp = limit +*/ +.balign 16 +LookupLoop: + and r8d, edx + + movzx r8d, word ptr [rdi + r8*2] + cmp r8d, ebp + BEFORE_JMP + jbe LeaveNow + AFTER_JMP + sub edx, 0x00010000 + BEFORE_JMP + js LeaveNow + AFTER_JMP + +LoopEntry: + + cmp bx,word ptr [rsi + r8 - 1] + BEFORE_JMP + jnz LookupLoop1 + AFTER_JMP +LookupLoopIsZero: + cmp r12w, word ptr [r10 + r8] + BEFORE_JMP + jnz LookupLoop1 + AFTER_JMP + + +//;;; Store the current value of chainlen. + mov [chainlenwmask], edx +/* +;;; Point edi to the string under scrutiny, and esi to the string we +;;; are hoping to match it up with. In actuality, esi and edi are +;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is +;;; initialized to -(MAX_MATCH_8 - scanalign). +*/ + lea rsi,[r8+r10] + mov rdx, 0xfffffffffffffef8 //; -(MAX_MATCH_8) + lea rsi, [rsi + r13 + 0x0108] //;MAX_MATCH_8] + lea rdi, [r9 + r13 + 0x0108] //;MAX_MATCH_8] + + prefetcht1 [rsi+rdx] + prefetcht1 [rdi+rdx] + +/* +;;; Test the strings for equality, 8 bytes at a time. At the end, +;;; adjust rdx so that it is offset to the exact byte that mismatched. +;;; +;;; We already know at this point that the first three bytes of the +;;; strings match each other, and they can be safely passed over before +;;; starting the compare loop. So what this code does is skip over 0-3 +;;; bytes, as much as necessary in order to dword-align the edi +;;; pointer. (rsi will still be misaligned three times out of four.) +;;; +;;; It should be confessed that this loop usually does not represent +;;; much of the total running time. Replacing it with a more +;;; straightforward "rep cmpsb" would not drastically degrade +;;; performance. +*/ + +LoopCmps: + mov rax, [rsi + rdx] + xor rax, [rdi + rdx] + jnz LeaveLoopCmps + + mov rax, [rsi + rdx + 8] + xor rax, [rdi + rdx + 8] + jnz LeaveLoopCmps8 + + + mov rax, [rsi + rdx + 8+8] + xor rax, [rdi + rdx + 8+8] + jnz LeaveLoopCmps16 + + add rdx,8+8+8 + + BEFORE_JMP + jnz LoopCmps + jmp LenMaximum + AFTER_JMP + +LeaveLoopCmps16: add rdx,8 +LeaveLoopCmps8: add rdx,8 +LeaveLoopCmps: + + test eax, 0x0000FFFF + jnz LenLower + + test eax,0xffffffff + + jnz LenLower32 + + add rdx,4 + shr rax,32 + or ax,ax + BEFORE_JMP + jnz LenLower + AFTER_JMP + +LenLower32: + shr eax,16 + add rdx,2 + +LenLower: + sub al, 1 + adc rdx, 0 +//;;; Calculate the length of the match. If it is longer than MAX_MATCH, +//;;; then automatically accept it as the best possible match and leave. + + lea rax, [rdi + rdx] + sub rax, r9 + cmp eax, MAX_MATCH + BEFORE_JMP + jge LenMaximum + AFTER_JMP +/* +;;; If the length of the match is not longer than the best match we +;;; have so far, then forget it and return to the lookup loop. +;/////////////////////////////////// +*/ + cmp eax, r11d + jg LongerMatch + + lea rsi,[r10+r11] + + mov rdi, prev_ad + mov edx, [chainlenwmask] + BEFORE_JMP + jmp LookupLoop + AFTER_JMP +/* +;;; s->match_start = cur_match; +;;; best_len = len; +;;; if (len >= nice_match) break; +;;; scan_end = *(ushf*)(scan+best_len-1); +*/ +LongerMatch: + mov r11d, eax + mov match_start, r8d + cmp eax, [nicematch] + BEFORE_JMP + jge LeaveNow + AFTER_JMP + + lea rsi,[r10+rax] + + movzx ebx, word ptr [r9 + rax - 1] + mov rdi, prev_ad + mov edx, [chainlenwmask] + BEFORE_JMP + jmp LookupLoop + AFTER_JMP + +//;;; Accept the current string, with the maximum possible length. + +LenMaximum: + mov r11d,MAX_MATCH + mov match_start, r8d + +//;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; +//;;; return s->lookahead; + +LeaveNow: + mov eax, Lookahead + cmp r11d, eax + cmovng eax, r11d + + + +//;;; Restore the stack and return from whence we came. + + +// mov rsi,[save_rsi] +// mov rdi,[save_rdi] + mov rbx,[save_rbx] + mov rbp,[save_rbp] + mov r12,[save_r12] + mov r13,[save_r13] + mov r14,[save_r14] + mov r15,[save_r15] + + + ret 0 +//; please don't remove this string ! +//; Your can freely use gvmat64 in any free or commercial app +//; but it is far better don't remove the string in the binary! + // db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0 + + +match_init: + ret 0 + + -- cgit v1.1 From 8b86799ca0b1a9b2f49e8d4d736adf1c101baac0 Mon Sep 17 00:00:00 2001 From: des Date: Wed, 21 Apr 2010 06:33:10 +0000 Subject: MFH OpenSSH 5.4p1 --- lib/libpam/modules/pam_ssh/Makefile | 3 +++ lib/libpam/modules/pam_ssh/pam_ssh.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/libpam/modules/pam_ssh/Makefile b/lib/libpam/modules/pam_ssh/Makefile index f7dcd0c..b638d8b 100644 --- a/lib/libpam/modules/pam_ssh/Makefile +++ b/lib/libpam/modules/pam_ssh/Makefile @@ -7,6 +7,9 @@ LIB= pam_ssh MAN= pam_ssh.8 SRCS= pam_ssh.c +# required when linking with a dynamic libssh +SRCS+= roaming_dummy.c + WARNS?= 0 CFLAGS+= -I${SSHDIR} -include ssh_namespace.h diff --git a/lib/libpam/modules/pam_ssh/pam_ssh.c b/lib/libpam/modules/pam_ssh/pam_ssh.c index 25c63ca..9d89045 100644 --- a/lib/libpam/modules/pam_ssh/pam_ssh.c +++ b/lib/libpam/modules/pam_ssh/pam_ssh.c @@ -61,6 +61,9 @@ __FBSDID("$FreeBSD$"); #include "authfd.h" #include "authfile.h" +#define ssh_add_identity(auth, key, comment) \ + ssh_add_identity_constrained(auth, key, comment, 0, 0) + extern char **environ; struct pam_ssh_key { -- cgit v1.1 From d17c958e27ca3e5193ad3c1013bec5ef876c7b18 Mon Sep 17 00:00:00 2001 From: kib Date: Wed, 21 Apr 2010 11:09:13 +0000 Subject: MFC r206549: Align the declaration for sa_sigaction with POSIX. MFC r206649: Still reference struct __sigaction with clarification when this form of argument declaration is needed. MFC r206802: Revert r206649. Simplify the presented declaration of struct sigaction. --- lib/libc/sys/sigaction.2 | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/sigaction.2 b/lib/libc/sys/sigaction.2 index 4c4f18f..3b4f2f0 100644 --- a/lib/libc/sys/sigaction.2 +++ b/lib/libc/sys/sigaction.2 @@ -28,7 +28,7 @@ .\" From: @(#)sigaction.2 8.2 (Berkeley) 4/3/94 .\" $FreeBSD$ .\" -.Dd June 7, 2004 +.Dd April 18, 2010 .Dt SIGACTION 2 .Os .Sh NAME @@ -40,16 +40,11 @@ .In signal.h .Bd -literal struct sigaction { - union { - void (*__sa_handler)(int); - void (*__sa_sigaction)(int, struct __siginfo *, void *); - } __sigaction_u; /* signal handler */ + void (*sa_handler)(int); + void (*sa_sigaction)(int, siginfo_t *, void *); int sa_flags; /* see signal options below */ sigset_t sa_mask; /* signal mask to apply */ }; - -#define sa_handler __sigaction_u.__sa_handler -#define sa_sigaction __sigaction_u.__sa_sigaction .Ed .Ft int .Fo sigaction @@ -148,6 +143,16 @@ If is non-zero, the previous handling information for the signal is returned to the user. .Pp +The above declaration of +.Vt "struct sigaction" +is not literal. +It is provided only to list the accessible members. +See +.In sys/signal.h +for the actual definition. +In particular, the storage occupied by sa_handler and sa_sigaction overlaps, +and an application can not use both simultaneously. +.Pp Once a signal handler is installed, it normally remains installed until another .Fn sigaction -- cgit v1.1 From 62840af138e26bcfd4e5b81e04a67f22560448d2 Mon Sep 17 00:00:00 2001 From: jilles Date: Fri, 23 Apr 2010 18:01:19 +0000 Subject: MFC r206711: fnmatch: Fix bad FNM_PERIOD disabling if an asterisk has been seen. Example: fnmatch("a*b/*", "abbb/.x", FNM_PATHNAME | FNM_PERIOD) PR: 116074 --- lib/libc/gen/fnmatch.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/fnmatch.c b/lib/libc/gen/fnmatch.c index e697bdc..3acbc3b 100644 --- a/lib/libc/gen/fnmatch.c +++ b/lib/libc/gen/fnmatch.c @@ -67,7 +67,8 @@ __FBSDID("$FreeBSD$"); #define RANGE_ERROR (-1) static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); -static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t); +static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, + mbstate_t); int fnmatch(pattern, string, flags) @@ -76,22 +77,21 @@ fnmatch(pattern, string, flags) { static const mbstate_t initial; - return (fnmatch1(pattern, string, flags, initial, initial)); + return (fnmatch1(pattern, string, string, flags, initial, initial)); } static int -fnmatch1(pattern, string, flags, patmbs, strmbs) - const char *pattern, *string; +fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs) + const char *pattern, *string, *stringstart; int flags; mbstate_t patmbs, strmbs; { - const char *stringstart; char *newp; char c; wchar_t pc, sc; size_t pclen, sclen; - for (stringstart = string;;) { + for (;;) { pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); if (pclen == (size_t)-1 || pclen == (size_t)-2) return (FNM_NOMATCH); @@ -145,8 +145,8 @@ fnmatch1(pattern, string, flags, patmbs, strmbs) /* General case, use recursion. */ while (sc != EOS) { - if (!fnmatch1(pattern, string, - flags & ~FNM_PERIOD, patmbs, strmbs)) + if (!fnmatch1(pattern, string, stringstart, + flags, patmbs, strmbs)) return (0); sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); -- cgit v1.1 From 49c4a03fe87df8c3c772e1ab83bfeafc541041b9 Mon Sep 17 00:00:00 2001 From: jilles Date: Sun, 2 May 2010 12:38:59 +0000 Subject: MFC r207187: kvm(3): Mention that some of the functions use sysctl(3) instead of kmem. Additionally, because of sysctl(3) use (which is generally good), behaviour for crash dumps differs slightly from behaviour for live kernels and this will probably never be fixed entirely, so weaken that claim. --- lib/libkvm/kvm.3 | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libkvm/kvm.3 b/lib/libkvm/kvm.3 index f694fd8..9dcd772 100644 --- a/lib/libkvm/kvm.3 +++ b/lib/libkvm/kvm.3 @@ -32,7 +32,7 @@ .\" @(#)kvm.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd January 29, 2004 +.Dd April 25, 2010 .Dt KVM 3 .Os .Sh NAME @@ -46,12 +46,15 @@ The library provides a uniform interface for accessing kernel virtual memory images, including live systems and crash dumps. Access to live systems is via +.Xr sysctl 3 +for some functions, and .Xr mem 4 and .Xr kmem 4 +for other functions, while crash dumps can be examined via the core file generated by .Xr savecore 8 . -The interface behaves identically in both cases. +The interface behaves similarly in both cases. Memory can be read and written, kernel symbol addresses can be looked up efficiently, and information about user processes can be gathered. @@ -112,5 +115,6 @@ given descriptor. .Xr kvm_openfiles 3 , .Xr kvm_read 3 , .Xr kvm_write 3 , +.Xr sysctl 3 , .Xr kmem 4 , .Xr mem 4 -- cgit v1.1 From 3b1de8da0b13e8f66e54d5f7aecaf1b9322dd000 Mon Sep 17 00:00:00 2001 From: marius Date: Sun, 2 May 2010 16:52:23 +0000 Subject: MFC: r206490, r206492 While SPARC V9 allows tininess to be detected either before or after rounding (impl. dep. #55), the SPARC JPS1 responsible for SPARC64 and UltraSPARC processors defines that in all cases tininess is detected before rounding therefore rounding up to the smallest normalized number should set the underflow flag. This change is needed for using SoftFloat on sparc64 for reference purposes. PR: 144900 Submitted by: Peter Jeremy --- lib/libc/softfloat/softfloat-specialize | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/libc/softfloat/softfloat-specialize b/lib/libc/softfloat/softfloat-specialize index c8c8028..e8585ce 100644 --- a/lib/libc/softfloat/softfloat-specialize +++ b/lib/libc/softfloat/softfloat-specialize @@ -44,7 +44,11 @@ Underflow tininess-detection mode, statically initialized to default value. #ifdef SOFTFLOAT_FOR_GCC static #endif +#ifdef __sparc64__ +int8 float_detect_tininess = float_tininess_before_rounding; +#else int8 float_detect_tininess = float_tininess_after_rounding; +#endif /* ------------------------------------------------------------------------------- -- cgit v1.1 From 45003aa79a1fb655b26b0f19031c16082a25c4bc Mon Sep 17 00:00:00 2001 From: kib Date: Tue, 4 May 2010 05:34:18 +0000 Subject: MFC r206893: Slightly modernize realpath(3). SUSv4 requires that implementation returns EINVAL if supplied path is NULL, and ENOENT if path is empty string [1]. Bring prototype in conformance with SUSv4, adding restrict keywords. Allow the resolved path buffer pointer be NULL, in which case realpath(3) allocates storage with malloc(). MFC r206898: Free() is not allowed to modify errno, remove safety brackets around it. Add small optimization, do not copy a string to the buffer that is to be freed immediately after. MFC r206997: Move realpath(3) prototype to a POSIX section. MFC r206998: Add standards section, improve wording, taking into account the handling of NULL and changed type in declaration. --- lib/libc/stdlib/realpath.3 | 25 +++++++++++++++++++----- lib/libc/stdlib/realpath.c | 48 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 62 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdlib/realpath.3 b/lib/libc/stdlib/realpath.3 index 4205a3d..aee65f7 100644 --- a/lib/libc/stdlib/realpath.3 +++ b/lib/libc/stdlib/realpath.3 @@ -31,7 +31,7 @@ .\" @(#)realpath.3 8.2 (Berkeley) 2/16/94 .\" $FreeBSD$ .\" -.Dd February 16, 1994 +.Dd April 19, 2010 .Dt REALPATH 3 .Os .Sh NAME @@ -43,7 +43,7 @@ .In sys/param.h .In stdlib.h .Ft "char *" -.Fn realpath "const char *pathname" "char resolved_path[PATH_MAX]" +.Fn realpath "const char *pathname" "char *resolved_path" .Sh DESCRIPTION The .Fn realpath @@ -64,7 +64,8 @@ argument .Em must refer to a buffer capable of storing at least .Dv PATH_MAX -characters. +characters, or be +.Dv NULL . .Pp The .Fn realpath @@ -82,13 +83,22 @@ The function returns .Fa resolved_path on success. +If the function was supplied +.Dv NULL +as +.Fa resolved_path , +and operation did not cause errors, the returned value is +a null-terminated string in a buffer allocated by a call to +.Fn malloc 3 . If an error occurs, .Fn realpath returns .Dv NULL , -and +and if .Fa resolved_path -contains the pathname which caused the problem. +is not +.Dv NULL , +the array that it points to contains the pathname which caused the problem. .Sh ERRORS The function .Fn realpath @@ -113,6 +123,11 @@ when given a relative .Fa pathname . .Sh "SEE ALSO" .Xr getcwd 3 +.Sh STANDARDS +The +.Fn realpath +function conforms to +.St -p1003.1-2001 . .Sh HISTORY The .Fn realpath diff --git a/lib/libc/stdlib/realpath.c b/lib/libc/stdlib/realpath.c index 3082f5f..e75ee4a 100644 --- a/lib/libc/stdlib/realpath.c +++ b/lib/libc/stdlib/realpath.c @@ -43,23 +43,37 @@ __FBSDID("$FreeBSD$"); #include "un-namespace.h" /* - * char *realpath(const char *path, char resolved[PATH_MAX]); - * * Find the real name of path, by removing all ".", ".." and symlink * components. Returns (resolved) on success, or (NULL) on failure, * in which case the path which caused trouble is left in (resolved). */ char * -realpath(const char *path, char resolved[PATH_MAX]) +realpath(const char * __restrict path, char * __restrict resolved) { struct stat sb; char *p, *q, *s; size_t left_len, resolved_len; unsigned symlinks; - int serrno, slen; + int serrno, slen, m; char left[PATH_MAX], next_token[PATH_MAX], symlink[PATH_MAX]; + if (path == NULL) { + errno = EINVAL; + return (NULL); + } + if (path[0] == '\0') { + errno = ENOENT; + return (NULL); + } serrno = errno; + if (resolved == NULL) { + resolved = malloc(PATH_MAX); + if (resolved == NULL) + return (NULL); + m = 1; + } else + m = 0; + symlinks = 0; if (path[0] == '/') { resolved[0] = '/'; @@ -70,13 +84,18 @@ realpath(const char *path, char resolved[PATH_MAX]) left_len = strlcpy(left, path + 1, sizeof(left)); } else { if (getcwd(resolved, PATH_MAX) == NULL) { - strlcpy(resolved, ".", PATH_MAX); + if (m) + free(resolved); + else + strlcpy(resolved, ".", PATH_MAX); return (NULL); } resolved_len = strlen(resolved); left_len = strlcpy(left, path, sizeof(left)); } if (left_len >= sizeof(left) || resolved_len >= PATH_MAX) { + if (m) + free(resolved); errno = ENAMETOOLONG; return (NULL); } @@ -92,6 +111,8 @@ realpath(const char *path, char resolved[PATH_MAX]) p = strchr(left, '/'); s = p ? p : left + left_len; if (s - left >= sizeof(next_token)) { + if (m) + free(resolved); errno = ENAMETOOLONG; return (NULL); } @@ -102,6 +123,8 @@ realpath(const char *path, char resolved[PATH_MAX]) memmove(left, s + 1, left_len + 1); if (resolved[resolved_len - 1] != '/') { if (resolved_len + 1 >= PATH_MAX) { + if (m) + free(resolved); errno = ENAMETOOLONG; return (NULL); } @@ -133,6 +156,8 @@ realpath(const char *path, char resolved[PATH_MAX]) */ resolved_len = strlcat(resolved, next_token, PATH_MAX); if (resolved_len >= PATH_MAX) { + if (m) + free(resolved); errno = ENAMETOOLONG; return (NULL); } @@ -141,16 +166,23 @@ realpath(const char *path, char resolved[PATH_MAX]) errno = serrno; return (resolved); } + if (m) + free(resolved); return (NULL); } if (S_ISLNK(sb.st_mode)) { if (symlinks++ > MAXSYMLINKS) { + if (m) + free(resolved); errno = ELOOP; return (NULL); } slen = readlink(resolved, symlink, sizeof(symlink) - 1); - if (slen < 0) + if (slen < 0) { + if (m) + free(resolved); return (NULL); + } symlink[slen] = '\0'; if (symlink[0] == '/') { resolved[1] = 0; @@ -171,6 +203,8 @@ realpath(const char *path, char resolved[PATH_MAX]) if (p != NULL) { if (symlink[slen - 1] != '/') { if (slen + 1 >= sizeof(symlink)) { + if (m) + free(resolved); errno = ENAMETOOLONG; return (NULL); } @@ -179,6 +213,8 @@ realpath(const char *path, char resolved[PATH_MAX]) } left_len = strlcat(symlink, left, sizeof(left)); if (left_len >= sizeof(left)) { + if (m) + free(resolved); errno = ENAMETOOLONG; return (NULL); } -- cgit v1.1 From 4925aa8c65bf2657c27d8d1086b5c67b8637c949 Mon Sep 17 00:00:00 2001 From: kib Date: Wed, 5 May 2010 09:29:34 +0000 Subject: MFC r207009: C language does not has references, it provides pointers. --- lib/libc/stdlib/realpath.3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdlib/realpath.3 b/lib/libc/stdlib/realpath.3 index aee65f7..166bb12 100644 --- a/lib/libc/stdlib/realpath.3 +++ b/lib/libc/stdlib/realpath.3 @@ -56,13 +56,13 @@ and in .Fa pathname , and copies the resulting absolute pathname into -the memory referenced by +the memory pointed to by .Fa resolved_path . The .Fa resolved_path argument .Em must -refer to a buffer capable of storing at least +point to a buffer capable of storing at least .Dv PATH_MAX characters, or be .Dv NULL . -- cgit v1.1 From 91722e5e54cdcd3b5c4ee49118ba83663b9bd144 Mon Sep 17 00:00:00 2001 From: jilles Date: Wed, 5 May 2010 22:00:57 +0000 Subject: MFC r207186: sysctl(3): Update description of various kern.* variables. Also add xrefs for confstr(3) (as sysconf(3) but for strings) and kvm(3) (which is a more convenient way to access some of the variables). PR: 116480 --- lib/libc/gen/sysctl.3 | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/sysctl.3 b/lib/libc/gen/sysctl.3 index 75f8668..77e7d92 100644 --- a/lib/libc/gen/sysctl.3 +++ b/lib/libc/gen/sysctl.3 @@ -28,7 +28,7 @@ .\" @(#)sysctl.3 8.4 (Berkeley) 5/9/95 .\" $FreeBSD$ .\" -.Dd January 28, 2009 +.Dd April 25, 2010 .Dt SYSCTL 3 .Os .Sh NAME @@ -326,7 +326,7 @@ information. .It "KERN_BOOTFILE string yes" .It "KERN_BOOTTIME struct timeval no" .It "KERN_CLOCKRATE struct clockinfo no" -.It "KERN_FILE struct file no" +.It "KERN_FILE struct xfile no" .It "KERN_HOSTID integer yes" .It "KERN_HOSTUUID string yes" .It "KERN_HOSTNAME string yes" @@ -343,14 +343,14 @@ information. .It "KERN_OSREV integer no" .It "KERN_OSTYPE string no" .It "KERN_POSIX1 integer no" -.It "KERN_PROC struct proc no" +.It "KERN_PROC node not applicable" .It "KERN_PROF node not applicable" .It "KERN_QUANTUM integer yes" .It "KERN_SAVED_IDS integer no" .It "KERN_SECURELVL integer raise only" .It "KERN_UPDATEINTERVAL integer no" .It "KERN_VERSION string no" -.It "KERN_VNODE struct vnode no" +.It "KERN_VNODE struct xvnode no" .El .Pp .Bl -tag -width 6n @@ -372,10 +372,8 @@ This structure contains the clock, statistics clock and profiling clock frequencies, the number of micro-seconds per hz tick and the skew rate. .It Li KERN_FILE Return the entire file table. -The returned data consists of a single -.Va struct filehead -followed by an array of -.Va struct file , +The returned data consists of an array of +.Va struct xfile , whose size depends on the current number of such objects in the system. .It Li KERN_HOSTID Get or set the host ID. @@ -527,10 +525,8 @@ Note, the vnode table is not necessarily a consistent snapshot of the system. The returned data consists of an array whose size depends on the current number of such objects in the system. -Each element of the array contains the kernel address of a vnode -.Va struct vnode * -followed by the vnode itself -.Va struct vnode . +Each element of the array consists of a +.Va struct xvnode . .El .Ss CTL_NET The string and integer information available for the CTL_NET level @@ -859,6 +855,8 @@ An attempt is made to set a read-only value. A process without appropriate privilege attempts to set a value. .El .Sh SEE ALSO +.Xr confstr 3 , +.Xr kvm 3 , .Xr sysconf 3 , .Xr sysctl 8 .Sh HISTORY -- cgit v1.1 From dced090d8c1e52f4b2beeccd22da467066a3f03b Mon Sep 17 00:00:00 2001 From: jilles Date: Wed, 5 May 2010 22:12:56 +0000 Subject: MFC r207190: unlinkat(2): unlinkat(AT_REMOVEDIR) fails with ENOTEMPTY like rmdir() for non-empty directories. POSIX permits both ENOTEMPTY and EEXIST, but we use the clearer ENOTEMPTY, following BSD tradition. --- lib/libc/sys/unlink.2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/unlink.2 b/lib/libc/sys/unlink.2 index 41a2288..ac18971 100644 --- a/lib/libc/sys/unlink.2 +++ b/lib/libc/sys/unlink.2 @@ -28,7 +28,7 @@ .\" @(#)unlink.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd April 10, 2008 +.Dd April 25, 2010 .Dt UNLINK 2 .Os .Sh NAME @@ -165,7 +165,7 @@ argument does not specify an absolute path and the argument is neither .Dv AT_FDCWD nor a valid file descriptor open for searching. -.It Bq Er EEXIST +.It Bq Er ENOTEMPTY The .Fa flag parameter has the -- cgit v1.1 From f127752dd2cf20c1672e4b541d92b10ca9a73fb8 Mon Sep 17 00:00:00 2001 From: jilles Date: Wed, 5 May 2010 22:17:17 +0000 Subject: MFC r206760: getcwd(3): Clarify that EACCES may or may not be checked. POSIX permits but does not require checking access on the current and parent directories. Because various programs do not like it if getcwd(3) fails, it seems best to avoid checking access as much as possible. There are various reports in GNATS about this (search for getcwd). Our getcwd(3) implementation first queries the kernel for the pathname directly, which does not check any permissions but sometimes fails, and then falls back to reading all parent directories for the names. PR: standards/44425 --- lib/libc/gen/getcwd.3 | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/getcwd.3 b/lib/libc/gen/getcwd.3 index 12a2bd6..88291c3 100644 --- a/lib/libc/gen/getcwd.3 +++ b/lib/libc/gen/getcwd.3 @@ -28,7 +28,7 @@ .\" @(#)getcwd.3 8.2 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd November 24, 1997 +.Dd April 17, 2010 .Dt GETCWD 3 .Os .Sh NAME @@ -108,8 +108,6 @@ The function will fail if: .Bl -tag -width Er -.It Bq Er EACCES -Read or search permission was denied for a component of the pathname. .It Bq Er EINVAL The .Fa size @@ -124,6 +122,16 @@ The argument is greater than zero but smaller than the length of the pathname plus 1. .El +.Pp +The +.Fn getcwd +function +may fail if: +.Bl -tag -width Er +.It Bq Er EACCES +Read or search permission was denied for a component of the pathname. +This is only checked in limited cases, depending on implementation details. +.El .Sh SEE ALSO .Xr chdir 2 , .Xr fchdir 2 , -- cgit v1.1 From 57be3b3deba6ed645b3d61ad5899c5db876d271f Mon Sep 17 00:00:00 2001 From: kib Date: Tue, 11 May 2010 13:39:37 +0000 Subject: MFC r207604: Document RUSAGE_THREAD. --- lib/libc/sys/getrusage.2 | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/getrusage.2 b/lib/libc/sys/getrusage.2 index bdf5d45..423503f 100644 --- a/lib/libc/sys/getrusage.2 +++ b/lib/libc/sys/getrusage.2 @@ -28,7 +28,7 @@ .\" @(#)getrusage.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd June 4, 1993 +.Dd May 1, 2010 .Dt GETRUSAGE 2 .Os .Sh NAME @@ -42,6 +42,7 @@ .In sys/resource.h .Fd "#define RUSAGE_SELF 0" .Fd "#define RUSAGE_CHILDREN -1" +.Fd "#define RUSAGE_THREAD 1" .Ft int .Fn getrusage "int who" "struct rusage *rusage" .Sh DESCRIPTION @@ -49,11 +50,12 @@ The .Fn getrusage system call returns information describing the resources utilized by the current -process, or all its terminated child processes. +thread, the current process, or all its terminated child processes. The .Fa who argument is either -.Dv RUSAGE_SELF +.Dv RUSAGE_THREAD , +.Dv RUSAGE_SELF , or .Dv RUSAGE_CHILDREN . The buffer to which @@ -175,6 +177,10 @@ The .Fn getrusage system call appeared in .Bx 4.2 . +The +.Dv RUSAGE_THREAD +facility first appeared in +.Fx 8.1 . .Sh BUGS There is no way to obtain information about a child process that has not yet terminated. -- cgit v1.1 From a324d90d0a18d25c754b7f0c8621be0aca0ace6f Mon Sep 17 00:00:00 2001 From: jilles Date: Thu, 13 May 2010 15:44:49 +0000 Subject: MFC r207734: sigaltstack(2): document some modernizations: * un-document 'struct sigaltstack' tag for stack_t as this is BSD-specific; this doesn't seem useful enough to document as such * alternate stacks are per thread, not per process * update error codes to what the kernel does and POSIX requires --- lib/libc/sys/sigaltstack.2 | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/sigaltstack.2 b/lib/libc/sys/sigaltstack.2 index da6877e..6119c6a 100644 --- a/lib/libc/sys/sigaltstack.2 +++ b/lib/libc/sys/sigaltstack.2 @@ -28,7 +28,7 @@ .\" @(#)sigaltstack.2 8.2 (Berkeley) 5/1/95 .\" $FreeBSD$ .\" -.Dd May 1, 1995 +.Dd May 6, 2010 .Dt SIGALTSTACK 2 .Os .Sh NAME @@ -39,7 +39,7 @@ .Sh SYNOPSIS .In signal.h .Bd -literal -typedef struct sigaltstack { +typedef struct { char *ss_sp; size_t ss_size; int ss_flags; @@ -51,25 +51,25 @@ typedef struct sigaltstack { The .Fn sigaltstack system call -allows users to define an alternate stack on which signals -are to be processed. +allows defining an alternate stack on which signals +are to be processed for the current thread. If .Fa ss is non-zero, it specifies a pointer to and the size of a .Em "signal stack" -on which to deliver signals, -and tells the system if the process is currently executing -on that stack. +on which to deliver signals. When a signal's action indicates its handler should execute on the signal stack (specified with a .Xr sigaction 2 system call), the system checks to see -if the process is currently executing on that stack. -If the process is not currently executing on the signal stack, +if the thread is currently executing on that stack. +If the thread is not currently executing on the signal stack, the system arranges a switch to the signal stack for the duration of the signal handler's execution. .Pp +An active stack cannot be modified. +.Pp If .Dv SS_DISABLE is set in @@ -78,12 +78,6 @@ is set in and .Fa ss_size are ignored and the signal stack will be disabled. -Trying to disable an active stack will cause -.Fn sigaltstack -to return -1 with -.Va errno -set to -.Er EINVAL . A disabled stack will cause all signals to be taken on the regular user stack. If the stack is later re-enabled then all signals that were specified @@ -96,7 +90,7 @@ The .Fa ss_flags field will contain the value .Dv SS_ONSTACK -if the process is currently on a signal stack and +if the thread is currently on a signal stack and .Dv SS_DISABLE if the signal stack is currently disabled. .Sh NOTES @@ -146,8 +140,12 @@ or .Fa oss points to memory that is not a valid part of the process address space. +.It Bq Er EPERM +An attempt was made to modify an active stack. .It Bq Er EINVAL -An attempt was made to disable an active stack. +The +.Fa ss_flags +field was invalid. .It Bq Er ENOMEM Size of alternate stack area is less than or equal to .Dv MINSIGSTKSZ . -- cgit v1.1 From 05725ac18b7ac599ca6a906125209db0e3c779c6 Mon Sep 17 00:00:00 2001 From: delphij Date: Thu, 13 May 2010 23:28:20 +0000 Subject: MFC r205099-205100,205108: Two optimizations to MI strlen(3) inspired by David S. Miller's blog posting [1]. - Use word-sized test for unaligned pointer before working the hard way. Memory page boundary is always integral multiple of a word alignment boundary. Therefore, if we can access memory referenced by pointer p, then (p & ~word mask) must be also accessible. - Better utilization of multi-issue processor's ability of concurrency. The previous implementation utilized a formular that must be executed sequentially. However, the ~, & and - operations can actually be caculated at the same time when the operand were different and unrelated. The original Hacker's Delight formular also offered consistent performance regardless whether the input would contain characters with their highest-bit set, as it catches real nul characters only. These two optimizations has shown further improvements over the previous implementation on microbenchmarks on i386 and amd64 CPU including Pentium 4, Core Duo 2 and i7. [1] http://vger.kernel.org/~davem/cgi-bin/blog.cgi/2010/03/08#strlen_1 --- lib/libc/string/strlen.c | 70 +++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/libc/string/strlen.c b/lib/libc/string/strlen.c index 860a988..2bc1f2b 100644 --- a/lib/libc/string/strlen.c +++ b/lib/libc/string/strlen.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009 Xin LI + * Copyright (c) 2009, 2010 Xin LI * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,15 +43,17 @@ __FBSDID("$FreeBSD$"); * ((x - 0x01....01) & ~x & 0x80....80) * * would evaluate to a non-zero value iff any of the bytes in the - * original word is zero. However, we can further reduce ~1/3 of - * time if we consider that strlen() usually operate on 7-bit ASCII - * by employing the following expression, which allows false positive - * when high bit of 1 and use the tail case to catch these case: + * original word is zero. * - * ((x - 0x01....01) & 0x80....80) + * On multi-issue processors, we can divide the above expression into: + * a) (x - 0x01....01) + * b) (~x & 0x80....80) + * c) a & b * - * This is more than 5.2 times as fast as the raw implementation on - * Intel T7300 under long mode for strings longer than word length. + * Where, a) and b) can be partially computed in parallel. + * + * The algorithm above is found on "Hacker's Delight" by + * Henry S. Warren, Jr. */ /* Magic numbers for the algorithm */ @@ -82,29 +84,47 @@ strlen(const char *str) { const char *p; const unsigned long *lp; + long va, vb; - /* Skip the first few bytes until we have an aligned p */ - for (p = str; (uintptr_t)p & LONGPTR_MASK; p++) - if (*p == '\0') - return (p - str); + /* + * Before trying the hard (unaligned byte-by-byte access) way + * to figure out whether there is a nul character, try to see + * if there is a nul character is within this accessible word + * first. + * + * p and (p & ~LONGPTR_MASK) must be equally accessible since + * they always fall in the same memory page, as long as page + * boundaries is integral multiple of word size. + */ + lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK); + va = (*lp - mask01); + vb = ((~*lp) & mask80); + lp++; + if (va & vb) + /* Check if we have \0 in the first part */ + for (p = str; p < (const char *)lp; p++) + if (*p == '\0') + return (p - str); /* Scan the rest of the string using word sized operation */ - for (lp = (const unsigned long *)p; ; lp++) - if ((*lp - mask01) & mask80) { - p = (const char *)(lp); - testbyte(0); - testbyte(1); - testbyte(2); - testbyte(3); + for (; ; lp++) { + va = (*lp - mask01); + vb = ((~*lp) & mask80); + if (va & vb) { + p = (const char *)(lp); + testbyte(0); + testbyte(1); + testbyte(2); + testbyte(3); #if (LONG_BIT >= 64) - testbyte(4); - testbyte(5); - testbyte(6); - testbyte(7); + testbyte(4); + testbyte(5); + testbyte(6); + testbyte(7); #endif - } + } + } /* NOTREACHED */ return (0); } - -- cgit v1.1 From e058c73bc5fcf603d4ea8f776c416ac1b7b0bf13 Mon Sep 17 00:00:00 2001 From: mm Date: Mon, 17 May 2010 14:27:40 +0000 Subject: MFC r207553, r207555, r207651: Implement the no_user_check option to pam_krb5. This option is available in the Linux implementation of pam_krb5 and allows to authorize a user not known to the local system. Ccache is not used as we don't have a secure uid/gid for the cache file. Usable for authentication of external kerberos users (e.g Active Directory) via PAM from applications like Cyrus saslauthd, PHP or perl. PR: bin/146186 Approved by: deplhij (mentor) --- lib/libpam/modules/pam_krb5/pam_krb5.8 | 6 +++- lib/libpam/modules/pam_krb5/pam_krb5.c | 53 +++++++++++++++++++--------------- 2 files changed, 35 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/libpam/modules/pam_krb5/pam_krb5.8 b/lib/libpam/modules/pam_krb5/pam_krb5.8 index 3e0db91..bd7ac5b 100644 --- a/lib/libpam/modules/pam_krb5/pam_krb5.8 +++ b/lib/libpam/modules/pam_krb5/pam_krb5.8 @@ -1,7 +1,7 @@ .\" .\" $Id: pam_krb5.5,v 1.5 2000/01/05 00:59:56 fcusack Exp $ .\" $FreeBSD$ -.Dd January 15, 1999 +.Dd May 3, 2010 .Dt PAM_KRB5 8 .Os .Sh NAME @@ -108,6 +108,10 @@ and .Ql %p , to designate the current process ID; can be used in .Ar name . +.It Cm no_user_check +Do not verify if a user exists on the local system. This option implies the +.Cm no_ccache +option because there is no secure local uid/gid for the cache file. .El .Ss Kerberos 5 Account Management Module The Kerberos 5 account management component diff --git a/lib/libpam/modules/pam_krb5/pam_krb5.c b/lib/libpam/modules/pam_krb5/pam_krb5.c index b56e0a3..439fcf9 100644 --- a/lib/libpam/modules/pam_krb5/pam_krb5.c +++ b/lib/libpam/modules/pam_krb5/pam_krb5.c @@ -89,6 +89,7 @@ static void compat_free_data_contents(krb5_context, krb5_data *); #define PAM_OPT_DEBUG "debug" #define PAM_OPT_FORWARDABLE "forwardable" #define PAM_OPT_NO_CCACHE "no_ccache" +#define PAM_OPT_NO_USER_CHECK "no_user_check" #define PAM_OPT_REUSE_CCACHE "reuse_ccache" /* @@ -194,34 +195,39 @@ pam_sm_authenticate(pam_handle_t *pamh, int flags __unused, PAM_LOG("Got password"); - /* Verify the local user exists (AFTER getting the password) */ - if (strchr(user, '@')) { - /* get a local account name for this principal */ - krbret = krb5_aname_to_localname(pam_context, princ, - sizeof(luser), luser); - if (krbret != 0) { - PAM_VERBOSE_ERROR("Kerberos 5 error"); - PAM_LOG("Error krb5_aname_to_localname(): %s", - krb5_get_err_text(pam_context, krbret)); - retval = PAM_USER_UNKNOWN; - goto cleanup2; + if (openpam_get_option(pamh, PAM_OPT_NO_USER_CHECK)) + PAM_LOG("Skipping local user check"); + else { + + /* Verify the local user exists (AFTER getting the password) */ + if (strchr(user, '@')) { + /* get a local account name for this principal */ + krbret = krb5_aname_to_localname(pam_context, princ, + sizeof(luser), luser); + if (krbret != 0) { + PAM_VERBOSE_ERROR("Kerberos 5 error"); + PAM_LOG("Error krb5_aname_to_localname(): %s", + krb5_get_err_text(pam_context, krbret)); + retval = PAM_USER_UNKNOWN; + goto cleanup2; + } + + retval = pam_set_item(pamh, PAM_USER, luser); + if (retval != PAM_SUCCESS) + goto cleanup2; + + PAM_LOG("PAM_USER Redone"); } - retval = pam_set_item(pamh, PAM_USER, luser); - if (retval != PAM_SUCCESS) + pwd = getpwnam(user); + if (pwd == NULL) { + retval = PAM_USER_UNKNOWN; goto cleanup2; + } - PAM_LOG("PAM_USER Redone"); - } - - pwd = getpwnam(user); - if (pwd == NULL) { - retval = PAM_USER_UNKNOWN; - goto cleanup2; + PAM_LOG("Done getpwnam()"); } - PAM_LOG("Done getpwnam()"); - /* Get a TGT */ memset(&creds, 0, sizeof(krb5_creds)); krbret = krb5_get_init_creds_password(pam_context, &creds, princ, @@ -366,7 +372,8 @@ pam_sm_setcred(pam_handle_t *pamh, int flags, return (PAM_SERVICE_ERR); /* If a persistent cache isn't desired, stop now. */ - if (openpam_get_option(pamh, PAM_OPT_NO_CCACHE)) + if (openpam_get_option(pamh, PAM_OPT_NO_CCACHE) || + openpam_get_option(pamh, PAM_OPT_NO_USER_CHECK)) return (PAM_SUCCESS); PAM_LOG("Establishing credentials"); -- cgit v1.1 From 40310299b8ffb63ea75c19d7352572e3f5bea061 Mon Sep 17 00:00:00 2001 From: thompsa Date: Mon, 17 May 2010 23:55:23 +0000 Subject: MFC r208020 Fix header file compliancy with libusb 1.0 from sourceforge. --- lib/libusb/libusb.h | 4 ++-- lib/libusb/libusb10.c | 2 ++ lib/libusb/libusb10_desc.c | 2 ++ lib/libusb/libusb10_io.c | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libusb/libusb.h b/lib/libusb/libusb.h index bdf2812..fb18c6e 100644 --- a/lib/libusb/libusb.h +++ b/lib/libusb/libusb.h @@ -185,7 +185,7 @@ enum libusb_debug_level { struct libusb_context; struct libusb_device; struct libusb_transfer; -struct libusb20_device; +struct libusb_device_handle; struct libusb_pollfd { int fd; @@ -194,7 +194,7 @@ struct libusb_pollfd { typedef struct libusb_context libusb_context; typedef struct libusb_device libusb_device; -typedef struct libusb20_device libusb_device_handle; +typedef struct libusb_device_handle libusb_device_handle; typedef struct libusb_pollfd libusb_pollfd; typedef void (*libusb_pollfd_added_cb) (int fd, short events, void *user_data); typedef void (*libusb_pollfd_removed_cb) (int fd, void *user_data); diff --git a/lib/libusb/libusb10.c b/lib/libusb/libusb10.c index 25520d2..598e78c 100644 --- a/lib/libusb/libusb10.c +++ b/lib/libusb/libusb10.c @@ -37,6 +37,8 @@ #include #include +#define libusb_device_handle libusb20_device + #include "libusb20.h" #include "libusb20_desc.h" #include "libusb20_int.h" diff --git a/lib/libusb/libusb10_desc.c b/lib/libusb/libusb10_desc.c index 0215bce..b5b06bc 100644 --- a/lib/libusb/libusb10_desc.c +++ b/lib/libusb/libusb10_desc.c @@ -29,6 +29,8 @@ #include #include +#define libusb_device_handle libusb20_device + #include "libusb20.h" #include "libusb20_desc.h" #include "libusb20_int.h" diff --git a/lib/libusb/libusb10_io.c b/lib/libusb/libusb10_io.c index 3d6daef..ce79723 100644 --- a/lib/libusb/libusb10_io.c +++ b/lib/libusb/libusb10_io.c @@ -34,6 +34,8 @@ #include #include +#define libusb_device_handle libusb20_device + #include "libusb20.h" #include "libusb20_desc.h" #include "libusb20_int.h" -- cgit v1.1 From 4c0081bfc8f7b68e52b1a5d8c3e5d5427ad3ef59 Mon Sep 17 00:00:00 2001 From: thompsa Date: Mon, 17 May 2010 23:56:17 +0000 Subject: MFC r208021 Fix return values for usb_find_busses() and usb_find_devices(). We should try to return the actual number of busses and devices. --- lib/libusb/libusb20_compat01.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libusb/libusb20_compat01.c b/lib/libusb/libusb20_compat01.c index 48a7dd5..e53770c 100644 --- a/lib/libusb/libusb20_compat01.c +++ b/lib/libusb/libusb20_compat01.c @@ -820,7 +820,7 @@ int usb_find_busses(void) { usb_busses = &usb_global_bus; - return (0); + return (1); } int @@ -904,7 +904,7 @@ usb_find_devices(void) LIST_ADD(usb_global_bus.devices, udev); } - return (0); /* success */ + return (devnum - 1); /* success */ } struct usb_device * -- cgit v1.1 From fe4b80709b0c6a420232cfe54bcb668771f04347 Mon Sep 17 00:00:00 2001 From: thompsa Date: Mon, 17 May 2010 23:57:34 +0000 Subject: MFC r208012 Support getting signed and unsigned HID data. --- lib/libusbhid/data.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/libusbhid/data.c b/lib/libusbhid/data.c index d278ddb..087989c 100644 --- a/lib/libusbhid/data.c +++ b/lib/libusbhid/data.c @@ -53,13 +53,17 @@ hid_get_data(const void *p, const hid_item_t *h) data = 0; for (i = 0; i <= end; i++) data |= buf[offs + i] << (i*8); + + /* Correctly shift down data */ data >>= hpos % 8; - data &= (1 << hsize) - 1; - if (h->logical_minimum < 0) { - /* Need to sign extend */ - hsize = sizeof data * 8 - hsize; - data = (data << hsize) >> hsize; - } + hsize = 32 - hsize; + + /* Mask and sign extend in one */ + if ((h->logical_minimum < 0) || (h->logical_maximum < 0)) + data = (int32_t)((int32_t)data << hsize) >> hsize; + else + data = (uint32_t)((uint32_t)data << hsize) >> hsize; + return (data); } -- cgit v1.1 From 90fb8f94bd2930565982e30ae0b9f6d1ee1bda6a Mon Sep 17 00:00:00 2001 From: thompsa Date: Mon, 17 May 2010 23:59:14 +0000 Subject: MFC r203773 Within libusb 0.1 API, bus number is always faked to 0. Device numbers, however, are possitive and seem to be reverse sorted in the list. Conform device numbering and bring a result that is consistent with the libusb 0.1 API. It is now possible to distinguish a device based on its (bus, dev) numbers. --- lib/libusb/libusb20_compat01.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/libusb/libusb20_compat01.c b/lib/libusb/libusb20_compat01.c index e53770c..43e3344 100644 --- a/lib/libusb/libusb20_compat01.c +++ b/lib/libusb/libusb20_compat01.c @@ -829,6 +829,7 @@ usb_find_devices(void) struct libusb20_device *pdev; struct usb_device *udev; struct LIBUSB20_DEVICE_DESC_DECODED *ddesc; + int devnum; int err; /* cleanup after last device search */ @@ -855,6 +856,7 @@ usb_find_devices(void) } /* iterate all devices */ + devnum = 1; pdev = NULL; while ((pdev = libusb20_be_device_foreach(usb_backend, pdev))) { udev = malloc(sizeof(*udev)); @@ -891,6 +893,7 @@ usb_find_devices(void) /* truncate number of configurations */ udev->descriptor.bNumConfigurations = USB_MAXCONFIG; } + udev->devnum = devnum++; /* link together the two structures */ udev->dev = pdev; pdev->privLuData = udev; -- cgit v1.1 From aeccfd7aa1f603dae9baccbaf18fe0dbcb27ac5e Mon Sep 17 00:00:00 2001 From: mm Date: Tue, 18 May 2010 00:46:15 +0000 Subject: MFC r204738, r205113 (imp): MFC r204738: Remove stale references to libkrb5. Rejigger the SUBDIR setting a smidge: we now set all the libraries that depend on something else, and then SUBDIR+= the rest. MFC r205113: Make this conform to the other top-level Makefile subdir listings with one file per line. Approved by: delphij (mentor) --- lib/Makefile | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 00b1c60..694b606 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -9,8 +9,8 @@ # csu must be built before all shared libaries for ELF. # libc must be built before all other shared libraries. # libbsm must be built before ibauditd. -# libcom_err must be built before libkrb5 and libpam. -# libcrypt must be built before libkrb5 and libpam. +# libcom_err must be built before libpam. +# libcrypt must be built before libpam. # libkvm must be built before libdevstat. # msun must be built before libg++ and libstdc++. # libmd must be built before libatm, libopie, libradius, and libtacplus. @@ -26,21 +26,84 @@ # libgssapi must be built before librpcsec_gss # # Otherwise, the SUBDIR list should be in alphabetical order. - -SUBDIR= ${_csu} libc libbsm libauditd libcom_err libcrypt libelf libkvm msun \ +# +# Except it appears bind needs to be compiled last + +SUBDIR_ORDERED= ${_csu} \ + libc \ + libbsm \ + libauditd \ + libcom_err \ + libcrypt \ + libelf \ + libkvm \ + msun \ libmd \ - ncurses ${_libnetgraph} libradius librpcsvc libsbuf \ - libtacplus libutil ${_libypclnt} libalias libarchive ${_libatm} \ - libbegemot ${_libbluetooth} ${_libbsnmp} libbz2 \ - libcalendar libcam libcompat libdevinfo libdevstat libdisk \ - libdwarf libedit libexpat libfetch libftpio libgeom ${_libgpib} \ - ${_libgssapi} ${_librpcsec_gss} libipsec \ - ${_libipx} libjail libkiconv libmagic libmemstat ${_libmilter} \ - ${_libmp} ${_libncp} ${_libngatm} libopie libpam libpcap \ - ${_libpmc} libproc librt ${_libsdp} ${_libsm} ${_libsmb} \ + ncurses \ + ${_libnetgraph} \ + libradius \ + librpcsvc \ + libsbuf \ + libtacplus \ + libutil \ + ${_libypclnt} + +SUBDIR= ${SUBDIR_ORDERED} \ + libalias \ + libarchive \ + ${_libatm} \ + libbegemot \ + ${_libbluetooth} \ + ${_libbsnmp} \ + libbz2 \ + libcalendar \ + libcam \ + libcompat \ + libdevinfo \ + libdevstat \ + libdisk \ + libdwarf \ + libedit \ + libexpat \ + libfetch \ + libftpio \ + libgeom \ + ${_libgpib} \ + ${_libgssapi} \ + ${_librpcsec_gss} \ + libipsec \ + ${_libipx} \ + libjail \ + libkiconv \ + libmagic \ + libmemstat \ + ${_libmilter} \ + ${_libmp} \ + ${_libncp} \ + ${_libngatm} \ + libopie \ + libpam \ + libpcap \ + ${_libpmc} \ + libproc \ + librt \ + ${_libsdp} \ + ${_libsm} \ + ${_libsmb} \ ${_libsmdb} \ - ${_libsmutil} libstand ${_libtelnet} ${_libthr} libthread_db libufs \ - libugidfw ${_libusbhid} ${_libusb} ${_libvgl} libwrap liby libz \ + ${_libsmutil} \ + libstand \ + ${_libtelnet} \ + ${_libthr} \ + libthread_db \ + libufs \ + libugidfw \ + ${_libusbhid} \ + ${_libusb} \ + ${_libvgl} \ + libwrap \ + liby \ + libz \ ${_bind} .if exists(${.CURDIR}/csu/${MACHINE_ARCH}-elf) -- cgit v1.1 From 684fea40b494a86f147beb6d274924a54e0106a7 Mon Sep 17 00:00:00 2001 From: thompsa Date: Tue, 18 May 2010 03:16:02 +0000 Subject: MFC r208023 Use fixed width integer types for parsing the binary hid data. --- lib/libusbhid/data.c | 9 ++++++--- lib/libusbhid/usbhid.h | 5 +++-- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/libusbhid/data.c b/lib/libusbhid/data.c index 087989c..eb42422 100644 --- a/lib/libusbhid/data.c +++ b/lib/libusbhid/data.c @@ -33,7 +33,7 @@ __FBSDID("$FreeBSD$"); #include #include "usbhid.h" -int +int32_t hid_get_data(const void *p, const hid_item_t *h) { const unsigned char *buf; @@ -68,12 +68,15 @@ hid_get_data(const void *p, const hid_item_t *h) } void -hid_set_data(void *p, const hid_item_t *h, int data) +hid_set_data(void *p, const hid_item_t *h, int32_t data) { unsigned char *buf; unsigned int hpos; unsigned int hsize; - int i, end, offs, mask; + uint32_t mask; + int i; + int end; + int offs; buf = p; hpos = h->pos; /* bit position of data */ diff --git a/lib/libusbhid/usbhid.h b/lib/libusbhid/usbhid.h index b8751cd..90f0f03 100644 --- a/lib/libusbhid/usbhid.h +++ b/lib/libusbhid/usbhid.h @@ -30,6 +30,7 @@ */ #include +#include typedef struct report_desc *report_desc_t; @@ -105,7 +106,7 @@ int hid_parse_usage_in_page(const char *name); int hid_parse_usage_page(const char *name); /* Extracting/insertion of data, data.c: */ -int hid_get_data(const void *p, const hid_item_t *h); -void hid_set_data(void *p, const hid_item_t *h, int data); +int32_t hid_get_data(const void *p, const hid_item_t *h); +void hid_set_data(void *p, const hid_item_t *h, int32_t data); __END_DECLS -- cgit v1.1 From be8f6d151def6056cf827b4e13ffb4ea51328c7d Mon Sep 17 00:00:00 2001 From: mm Date: Tue, 18 May 2010 09:59:09 +0000 Subject: MFC r207842, r207844, r208099: MFC r207842: Import of liblzma, xz, xzdec, lzmainfo from vendor branch Add support for xz and lzma to lesspipe.sh (xzless, lzless) MFC r207844: Add two public headers missing in r207842 Adjust CFLAGS for lzmainfo, xz, xzdec MFC r208099: Add versioned symbols to liblzma Use default SHLIB_MAJOR. Approved by: delphij (mentor) --- lib/Makefile | 1 + lib/liblzma/Makefile | 141 +++++++++++++++++++++++++++++++++++ lib/liblzma/Symbol.map | 189 +++++++++++++++++++++++++++++++++++++++++++++++ lib/liblzma/Versions.def | 9 +++ lib/liblzma/config.h | 95 ++++++++++++++++++++++++ 5 files changed, 435 insertions(+) create mode 100644 lib/liblzma/Makefile create mode 100644 lib/liblzma/Symbol.map create mode 100644 lib/liblzma/Versions.def create mode 100644 lib/liblzma/config.h (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 694b606..e9f4ec3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -75,6 +75,7 @@ SUBDIR= ${SUBDIR_ORDERED} \ ${_libipx} \ libjail \ libkiconv \ + liblzma \ libmagic \ libmemstat \ ${_libmilter} \ diff --git a/lib/liblzma/Makefile b/lib/liblzma/Makefile new file mode 100644 index 0000000..f84ac93 --- /dev/null +++ b/lib/liblzma/Makefile @@ -0,0 +1,141 @@ +# $FreeBSD$ + +LIB= lzma +LZMADIR= ${.CURDIR}/../../contrib/xz/src/liblzma + +.PATH: ${LZMADIR}/../common +SRCS+= tuklib_physmem.c + +.PATH: ${LZMADIR}/api/lzma + +MAININCS= ../lzma.h +MAININCSDIR= ${INCLUDEDIR} + +LZMAINCS+= base.h \ + bcj.h \ + block.h \ + check.h \ + container.h \ + delta.h \ + filter.h \ + hardware.h \ + index.h \ + index_hash.h \ + lzma.h \ + stream_flags.h \ + subblock.h \ + version.h \ + vli.h + +LZMAINCSDIR= ${INCLUDEDIR}/lzma + +INCSGROUPS= MAININCS LZMAINCS + +.PATH: ${LZMADIR}/common +SRCS+= common.c \ + block_util.c \ + easy_preset.c \ + filter_common.c \ + hardware_physmem.c \ + index.c \ + stream_flags_common.c \ + vli_size.c \ + alone_encoder.c \ + block_buffer_encoder.c \ + block_encoder.c \ + block_header_encoder.c \ + easy_buffer_encoder.c \ + easy_encoder.c \ + easy_encoder_memusage.c \ + filter_buffer_encoder.c \ + filter_encoder.c \ + filter_flags_encoder.c \ + index_encoder.c \ + stream_buffer_encoder.c \ + stream_encoder.c \ + stream_flags_encoder.c \ + vli_encoder.c \ + alone_decoder.c \ + auto_decoder.c \ + block_buffer_decoder.c \ + block_decoder.c \ + block_header_decoder.c \ + easy_decoder_memusage.c \ + filter_buffer_decoder.c \ + filter_decoder.c \ + filter_flags_decoder.c \ + index_decoder.c \ + index_hash.c \ + stream_buffer_decoder.c \ + stream_decoder.c \ + stream_flags_decoder.c \ + vli_decoder.c + +.PATH: ${LZMADIR}/check +SRCS+= check.c \ + crc32_table.c \ + crc64_table.c \ + sha256.c +.if defined(MACHINE_ARCH) && ${MACHINE_ARCH} == "i386" +SRCS+= crc32_x86.S \ + crc64_x86.S +.else +SRCS+= crc32_fast.c \ + crc64_fast.c +.endif + +.PATH: ${LZMADIR}/lz +SRCS+= lz_encoder.c \ + lz_encoder_mf.c \ + lz_decoder.c + +.PATH: ${LZMADIR}/lzma +SRCS+= lzma_encoder.c \ + lzma_encoder_presets.c \ + lzma_encoder_optimum_fast.c \ + lzma_encoder_optimum_normal.c \ + fastpos_table.c \ + lzma_decoder.c \ + lzma2_encoder.c \ + lzma2_decoder.c + +.PATH: ${LZMADIR}/rangecoder +SRCS+= price_table.c + +.PATH: ${LZMADIR}/delta +SRCS+= delta_common.c \ + delta_encoder.c \ + delta_decoder.c + +.PATH: ${LZMADIR}/simple +SRCS+= simple_coder.c \ + simple_encoder.c \ + simple_decoder.c \ + x86.c \ + powerpc.c \ + ia64.c \ + arm.c \ + armthumb.c \ + sparc.c + +WARNS?= 3 + +CFLAGS+= -DHAVE_CONFIG_H \ + -DTUKLIB_SYMBOL_PREFIX=lzma_ \ + -I${.CURDIR} \ + -I${LZMADIR}/api \ + -I${LZMADIR}/common \ + -I${LZMADIR}/check \ + -I${LZMADIR}/lz \ + -I${LZMADIR}/rangecoder \ + -I${LZMADIR}/lzma \ + -I${LZMADIR}/subblock \ + -I${LZMADIR}/delta \ + -I${LZMADIR}/simple \ + -I${LZMADIR}/../common + +VERSION_DEF= ${.CURDIR}/Versions.def +SYMBOL_MAPS= ${.CURDIR}/Symbol.map +CFLAGS+= -DSYMBOL_VERSIONING + +.include diff --git a/lib/liblzma/Symbol.map b/lib/liblzma/Symbol.map new file mode 100644 index 0000000..efefffd --- /dev/null +++ b/lib/liblzma/Symbol.map @@ -0,0 +1,189 @@ +/* + * $FreeBSD$ + */ + +XZ_5.0 { + lzma_alone_decoder; + lzma_alone_encoder; + lzma_auto_decoder; + lzma_block_buffer_bound; + lzma_block_buffer_decode; + lzma_block_buffer_encode; + lzma_block_compressed_size; + lzma_block_decoder; + lzma_block_encoder; + lzma_block_header_decode; + lzma_block_header_encode; + lzma_block_header_size; + lzma_block_total_size; + lzma_block_unpadded_size; + lzma_check_is_supported; + lzma_check_size; + lzma_code; + lzma_crc32; + lzma_crc64; + lzma_easy_buffer_encode; + lzma_easy_decoder_memusage; + lzma_easy_encoder; + lzma_easy_encoder_memusage; + lzma_end; + lzma_filter_decoder_is_supported; + lzma_filter_encoder_is_supported; + lzma_filter_flags_decode; + lzma_filter_flags_encode; + lzma_filter_flags_size; + lzma_filters_copy; + lzma_filters_update; + lzma_get_check; + lzma_index_append; + lzma_index_block_count; + lzma_index_buffer_decode; + lzma_index_buffer_encode; + lzma_index_cat; + lzma_index_checks; + lzma_index_decoder; + lzma_index_dup; + lzma_index_encoder; + lzma_index_end; + lzma_index_file_size; + lzma_index_hash_append; + lzma_index_hash_decode; + lzma_index_hash_end; + lzma_index_hash_init; + lzma_index_hash_size; + lzma_index_init; + lzma_index_iter_init; + lzma_index_iter_locate; + lzma_index_iter_next; + lzma_index_iter_rewind; + lzma_index_memusage; + lzma_index_memused; + lzma_index_size; + lzma_index_stream_count; + lzma_index_stream_flags; + lzma_index_stream_padding; + lzma_index_stream_size; + lzma_index_total_size; + lzma_index_uncompressed_size; + lzma_lzma_preset; + lzma_memlimit_get; + lzma_memlimit_set; + lzma_memusage; + lzma_mf_is_supported; + lzma_mode_is_supported; + lzma_physmem; + lzma_properties_decode; + lzma_properties_encode; + lzma_properties_size; + lzma_raw_buffer_decode; + lzma_raw_buffer_encode; + lzma_raw_decoder; + lzma_raw_decoder_memusage; + lzma_raw_encoder; + lzma_raw_encoder_memusage; + lzma_stream_buffer_bound; + lzma_stream_buffer_decode; + lzma_stream_buffer_encode; + lzma_stream_decoder; + lzma_stream_encoder; + lzma_stream_flags_compare; + lzma_stream_footer_decode; + lzma_stream_footer_encode; + lzma_stream_header_decode; + lzma_stream_header_encode; + lzma_version_number; + lzma_version_string; + lzma_vli_decode; + lzma_vli_encode; + lzma_vli_size; +}; + +XZprivate_1.0 { + lzma_alloc; + lzma_alone_decoder_init; + lzma_block_decoder_init; + lzma_block_encoder_init; + lzma_bufcpy; + lzma_check_finish; + lzma_check_init; + lzma_check_update; + lzma_chunk_size; + lzma_delta_coder_init; + lzma_delta_coder_memusage; + lzma_delta_decoder_init; + lzma_delta_encoder_init; + lzma_delta_props_decode; + lzma_delta_props_encode; + lzma_easy_preset; + lzma_free; + lzma_index_encoder_init; + lzma_index_padding_size; + lzma_index_prealloc; + lzma_lz_decoder_init; + lzma_lz_decoder_memusage; + lzma_lz_decoder_uncompressed; + lzma_lz_encoder_init; + lzma_lz_encoder_memusage; + lzma_lzma2_decoder_init; + lzma_lzma2_decoder_memusage; + lzma_lzma2_encoder_init; + lzma_lzma2_encoder_memusage; + lzma_lzma2_props_decode; + lzma_lzma2_props_encode; + lzma_lzma_decoder_create; + lzma_lzma_decoder_init; + lzma_lzma_decoder_memusage; + lzma_lzma_decoder_memusage_nocheck; + lzma_lzma_encode; + lzma_lzma_encoder_create; + lzma_lzma_encoder_init; + lzma_lzma_encoder_memusage; + lzma_lzma_encoder_reset; + lzma_lzma_lclppb_decode; + lzma_lzma_lclppb_encode; + lzma_lzma_optimum_fast; + lzma_lzma_optimum_normal; + lzma_lzma_props_decode; + lzma_lzma_props_encode; + lzma_mf_bt2_find; + lzma_mf_bt2_skip; + lzma_mf_bt3_find; + lzma_mf_bt3_skip; + lzma_mf_bt4_find; + lzma_mf_bt4_skip; + lzma_mf_find; + lzma_mf_hc3_find; + lzma_mf_hc3_skip; + lzma_mf_hc4_find; + lzma_mf_hc4_skip; + lzma_next_end; + lzma_next_filter_init; + lzma_next_filter_update; + lzma_raw_coder_init; + lzma_raw_coder_memusage; + lzma_raw_decoder_init; + lzma_raw_encoder_init; + lzma_sha256_finish; + lzma_sha256_init; + lzma_sha256_update; + lzma_simple_arm_decoder_init; + lzma_simple_arm_encoder_init; + lzma_simple_armthumb_decoder_init; + lzma_simple_armthumb_encoder_init; + lzma_simple_coder_init; + lzma_simple_ia64_decoder_init; + lzma_simple_ia64_encoder_init; + lzma_simple_powerpc_decoder_init; + lzma_simple_powerpc_encoder_init; + lzma_simple_props_decode; + lzma_simple_props_encode; + lzma_simple_props_size; + lzma_simple_sparc_decoder_init; + lzma_simple_sparc_encoder_init; + lzma_simple_x86_decoder_init; + lzma_simple_x86_encoder_init; + lzma_stream_decoder_init; + lzma_stream_encoder_init; + lzma_strm_init; + lzma_tuklib_physmem; +}; diff --git a/lib/liblzma/Versions.def b/lib/liblzma/Versions.def new file mode 100644 index 0000000..1f65670 --- /dev/null +++ b/lib/liblzma/Versions.def @@ -0,0 +1,9 @@ +# $FreeBSD$ + +XZ_5.0 { +}; + + +XZprivate_1.0 { +} XZ_5.0; + diff --git a/lib/liblzma/config.h b/lib/liblzma/config.h new file mode 100644 index 0000000..223cb95 --- /dev/null +++ b/lib/liblzma/config.h @@ -0,0 +1,95 @@ +// $FreeBSD$ +#define ASSUME_RAM 128 +#define HAVE_CHECK_CRC32 1 +#define HAVE_CHECK_CRC64 1 +#define HAVE_CHECK_SHA256 1 +#define HAVE_DECL_PROGRAM_INVOCATION_NAME 0 +#define HAVE_DECODER 1 +#define HAVE_DECODER_ARM 1 +#define HAVE_DECODER_ARMTHUMB 1 +#define HAVE_DECODER_DELTA 1 +#define HAVE_DECODER_IA64 1 +#define HAVE_DECODER_LZMA1 1 +#define HAVE_DECODER_LZMA2 1 +#define HAVE_DECODER_POWERPC 1 +#define HAVE_DECODER_SPARC 1 +#define HAVE_DECODER_X86 1 +#define HAVE_DLFCN_H 1 +#define HAVE_ENCODER 1 +#define HAVE_ENCODER_ARM 1 +#define HAVE_ENCODER_ARMTHUMB 1 +#define HAVE_ENCODER_DELTA 1 +#define HAVE_ENCODER_IA64 1 +#define HAVE_ENCODER_LZMA1 1 +#define HAVE_ENCODER_LZMA2 1 +#define HAVE_ENCODER_POWERPC 1 +#define HAVE_ENCODER_SPARC 1 +#define HAVE_ENCODER_X86 1 +#define HAVE_FCNTL_H 1 +#define HAVE_FUTIMES 1 +#define HAVE_GETOPT_H 1 +#define HAVE_GETOPT_LONG 1 +#define HAVE_INTTYPES_H 1 +#define HAVE_LIMITS_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_MF_BT2 1 +#define HAVE_MF_BT3 1 +#define HAVE_MF_BT4 1 +#define HAVE_MF_HC3 1 +#define HAVE_MF_HC4 1 +#define HAVE_OPTRESET 1 +#define HAVE_PTHREAD 1 +#define HAVE_STDBOOL_H 1 +#define HAVE_STDINT_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRINGS_H 1 +#define HAVE_STRING_H 1 +#define HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC 1 +#define HAVE_SYS_ENDIAN_H 1 +#define HAVE_SYS_PARAM_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TIME_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_UINTPTR_T 1 +#define HAVE_UNISTD_H 1 +#define HAVE_VISIBILITY 1 +#define HAVE__BOOL 1 +#define LT_OBJDIR ".libs/" +#define NDEBUG 1 +#define PACKAGE "xz" +#define PACKAGE_BUGREPORT "lasse.collin@tukaani.org" +#define PACKAGE_NAME "XZ Utils" +#define PACKAGE_STRING "XZ Utils 4.999.9beta" +#define PACKAGE_TARNAME "xz" +#define PACKAGE_URL "http://tukaani.org/xz/" +#define PACKAGE_VERSION "4.999.9beta" +#define SIZEOF_SIZE_T 8 +#define STDC_HEADERS 1 +#define TUKLIB_CPUCORES_SYSCONF 1 +#define TUKLIB_FAST_UNALIGNED_ACCESS 1 +#define TUKLIB_PHYSMEM_SYSCONF 1 +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif +#define VERSION "4.999.9beta" +#if defined AC_APPLE_UNIVERSAL_BUILD +# if defined __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# endif +#else +# ifndef WORDS_BIGENDIAN +/* # undef WORDS_BIGENDIAN */ +# endif +#endif -- cgit v1.1 From c00023af29c999b7ae1d2d84a75d96141fb83b1f Mon Sep 17 00:00:00 2001 From: kaiw Date: Tue, 18 May 2010 10:32:20 +0000 Subject: MFC r205728 Merge improvements from kernel HID parser to the userland usbhid(3) parser. This merge does not change any API and should not break any native or thirdparty applications. Changes include: * Merge multiple report ID support and other improvements from kernel HID parser. * Ignore rid argument in hid_start_parser, parse all the report items since we now support multiple report ID. * Skip report ID byte in hid_get_data() and set report ID byte in hid_set_data(), if report ID is non-zero. * Reimplement hid_get_report_id: instead get report id from uhid device (which is always 0), try parsing the report descriptor and return the first report ID encountered. MFC r207812 hid_get_data() now expects that the hid data passed in always contains the report ID byte. Thus we should not skip the the report ID byte in hid_interrupt(). Also, if HUP_KEYBOARD usage is an array, do not try to modify the 'data' pointer, instead, increase the hid_item_t field 'pos' by 'report_size' before calling hid_get_data() during each iteration. --- lib/libusbhid/data.c | 31 ++- lib/libusbhid/descr.c | 22 +- lib/libusbhid/parse.c | 536 ++++++++++++++++++++++++++++++------------------- lib/libusbhid/usage.c | 1 + lib/libusbhid/usbhid.h | 63 +++--- lib/libusbhid/usbvar.h | 4 +- 6 files changed, 406 insertions(+), 251 deletions(-) (limited to 'lib') diff --git a/lib/libusbhid/data.c b/lib/libusbhid/data.c index eb42422..3b90ac6 100644 --- a/lib/libusbhid/data.c +++ b/lib/libusbhid/data.c @@ -29,6 +29,7 @@ #include __FBSDID("$FreeBSD$"); +#include #include #include #include "usbhid.h" @@ -36,18 +37,27 @@ __FBSDID("$FreeBSD$"); int32_t hid_get_data(const void *p, const hid_item_t *h) { - const unsigned char *buf; - unsigned int hpos; - unsigned int hsize; - int data; + const uint8_t *buf; + uint32_t hpos; + uint32_t hsize; + uint32_t data; int i, end, offs; buf = p; + + /* Skip report ID byte. */ + if (h->report_ID > 0) + buf++; + hpos = h->pos; /* bit position of data */ hsize = h->report_size; /* bit length of data */ + /* Range check and limit */ if (hsize == 0) return (0); + if (hsize > 32) + hsize = 32; + offs = hpos / 8; end = (hpos + hsize) / 8 - offs; data = 0; @@ -70,15 +80,20 @@ hid_get_data(const void *p, const hid_item_t *h) void hid_set_data(void *p, const hid_item_t *h, int32_t data) { - unsigned char *buf; - unsigned int hpos; - unsigned int hsize; + uint8_t *buf; + uint32_t hpos; + uint32_t hsize; uint32_t mask; int i; int end; int offs; buf = p; + + /* Set report ID byte. */ + if (h->report_ID > 0) + *buf++ = h->report_ID & 0xff; + hpos = h->pos; /* bit position of data */ hsize = h->report_size; /* bit length of data */ @@ -97,5 +112,5 @@ hid_set_data(void *p, const hid_item_t *h, int32_t data) for (i = 0; i <= end; i++) buf[offs + i] = (buf[offs + i] & (mask >> (i*8))) | - ((data >> (i*8)) & 0xff); + ((data >> (i*8)) & 0xff); } diff --git a/lib/libusbhid/descr.c b/lib/libusbhid/descr.c index 2ce1bf0..a5c033a 100644 --- a/lib/libusbhid/descr.c +++ b/lib/libusbhid/descr.c @@ -38,7 +38,6 @@ __FBSDID("$FreeBSD$"); #include #include #include - #include #include "usbhid.h" @@ -59,9 +58,30 @@ hid_set_immed(int fd, int enable) int hid_get_report_id(int fd) { + report_desc_t rep; + hid_data_t d; + hid_item_t h; + int kindset; int temp = -1; int ret; + if ((rep = hid_get_report_desc(fd)) == NULL) + goto use_ioctl; + kindset = 1 << hid_input | 1 << hid_output | 1 << hid_feature; + for (d = hid_start_parse(rep, kindset, 0); hid_get_item(d, &h); ) { + /* Return the first report ID we met. */ + if (h.report_ID != 0) { + temp = h.report_ID; + break; + } + } + hid_end_parse(d); + hid_dispose_report_desc(rep); + + if (temp > 0) + return (temp); + +use_ioctl: ret = ioctl(fd, USB_GET_REPORT_ID, &temp); #ifdef HID_COMPAT7 if (ret < 0) diff --git a/lib/libusbhid/parse.c b/lib/libusbhid/parse.c index 3abc036..ce034b1 100644 --- a/lib/libusbhid/parse.c +++ b/lib/libusbhid/parse.c @@ -40,42 +40,43 @@ __FBSDID("$FreeBSD$"); #include "usbhid.h" #include "usbvar.h" -#define MAXUSAGE 100 -struct hid_data { - u_char *start; - u_char *end; - u_char *p; - hid_item_t cur; - unsigned int usages[MAXUSAGE]; - int nusage; - int minset; - int logminsize; - int multi; - int multimax; - int kindset; - int reportid; - - /* - * The start of collection item has no report ID set, so save - * it until we know the ID. - */ - hid_item_t savedcoll; - u_char hassavedcoll; - /* - * Absolute data position (bits) for input/output/feature. - * Assumes that hid_input, hid_output and hid_feature have - * values 0, 1 and 2. - */ - unsigned int kindpos[3]; -}; +#define MAXUSAGE 100 +#define MAXPUSH 4 +#define MAXID 64 -static int min(int x, int y) { return x < y ? x : y; } +struct hid_pos_data { + int32_t rid; + uint32_t pos; +}; -static int hid_get_item_raw(hid_data_t s, hid_item_t *h); +struct hid_data { + const uint8_t *start; + const uint8_t *end; + const uint8_t *p; + struct hid_item cur[MAXPUSH]; + struct hid_pos_data last_pos[MAXID]; + int32_t usages_min[MAXUSAGE]; + int32_t usages_max[MAXUSAGE]; + int32_t usage_last; /* last seen usage */ + uint32_t loc_size; /* last seen size */ + uint32_t loc_count; /* last seen count */ + uint8_t kindset; /* we have 5 kinds so 8 bits are enough */ + uint8_t pushlevel; /* current pushlevel */ + uint8_t ncount; /* end usage item count */ + uint8_t icount; /* current usage item count */ + uint8_t nusage; /* end "usages_min/max" index */ + uint8_t iusage; /* current "usages_min/max" index */ + uint8_t ousage; /* current "usages_min/max" offset */ + uint8_t susage; /* usage set flags */ +}; +/*------------------------------------------------------------------------* + * hid_clear_local + *------------------------------------------------------------------------*/ static void hid_clear_local(hid_item_t *c) { + c->usage = 0; c->usage_minimum = 0; c->usage_maximum = 0; @@ -88,8 +89,61 @@ hid_clear_local(hid_item_t *c) c->set_delimiter = 0; } +static void +hid_switch_rid(struct hid_data *s, struct hid_item *c, int32_t next_rID) +{ + uint8_t i; + + /* check for same report ID - optimise */ + + if (c->report_ID == next_rID) + return; + + /* save current position for current rID */ + + if (c->report_ID == 0) { + i = 0; + } else { + for (i = 1; i != MAXID; i++) { + if (s->last_pos[i].rid == c->report_ID) + break; + if (s->last_pos[i].rid == 0) + break; + } + } + if (i != MAXID) { + s->last_pos[i].rid = c->report_ID; + s->last_pos[i].pos = c->pos; + } + + /* store next report ID */ + + c->report_ID = next_rID; + + /* lookup last position for next rID */ + + if (next_rID == 0) { + i = 0; + } else { + for (i = 1; i != MAXID; i++) { + if (s->last_pos[i].rid == next_rID) + break; + if (s->last_pos[i].rid == 0) + break; + } + } + if (i != MAXID) { + s->last_pos[i].rid = next_rID; + c->pos = s->last_pos[i].pos; + } else + c->pos = 0; /* Out of RID entries. */ +} + +/*------------------------------------------------------------------------* + * hid_start_parse + *------------------------------------------------------------------------*/ hid_data_t -hid_start_parse(report_desc_t d, int kindset, int id) +hid_start_parse(report_desc_t d, int kindset, int id __unused) { struct hid_data *s; @@ -98,213 +152,207 @@ hid_start_parse(report_desc_t d, int kindset, int id) s->start = s->p = d->data; s->end = d->data + d->size; s->kindset = kindset; - s->reportid = id; - s->hassavedcoll = 0; return (s); } +/*------------------------------------------------------------------------* + * hid_end_parse + *------------------------------------------------------------------------*/ void hid_end_parse(hid_data_t s) { - while (s->cur.next) { - hid_item_t *hi = s->cur.next->next; - free(s->cur.next); - s->cur.next = hi; - } + + if (s == NULL) + return; + free(s); } -int -hid_get_item(hid_data_t s, hid_item_t *h) +/*------------------------------------------------------------------------* + * get byte from HID descriptor + *------------------------------------------------------------------------*/ +static uint8_t +hid_get_byte(struct hid_data *s, const uint16_t wSize) { - int r; + const uint8_t *ptr; + uint8_t retval; - for (;;) { - r = hid_get_item_raw(s, h); - if (r <= 0) - break; - if (h->report_ID == s->reportid || s->reportid == -1) - break; - } - return (r); + ptr = s->p; + + /* check if end is reached */ + if (ptr == s->end) + return (0); + + /* read out a byte */ + retval = *ptr; + + /* check if data pointer can be advanced by "wSize" bytes */ + if ((s->end - ptr) < wSize) + ptr = s->end; + else + ptr += wSize; + + /* update pointer */ + s->p = ptr; + + return (retval); } -#define REPORT_SAVED_COLL \ - do { \ - if (s->hassavedcoll) { \ - *h = s->savedcoll; \ - h->report_ID = c->report_ID; \ - s->hassavedcoll = 0; \ - return (1); \ - } \ - } while(/*LINTED*/ 0) - -static int -hid_get_item_raw(hid_data_t s, hid_item_t *h) +/*------------------------------------------------------------------------* + * hid_get_item + *------------------------------------------------------------------------*/ +int +hid_get_item(hid_data_t s, hid_item_t *h) { hid_item_t *c; - unsigned int bTag = 0, bType = 0, bSize; - unsigned char *data; - int dval; - unsigned char *p; - hid_item_t *hi; - hid_item_t nc; - int i; - hid_kind_t retkind; + unsigned int bTag, bType, bSize; + uint32_t oldpos; + int32_t mask; + int32_t dval; + + if (s == NULL) + return (0); - c = &s->cur; + c = &s->cur[s->pushlevel]; top: - if (s->multimax) { - REPORT_SAVED_COLL; - if (c->logical_minimum >= c->logical_maximum) { - if (s->logminsize == 1) - c->logical_minimum =(int8_t)c->logical_minimum; - else if (s->logminsize == 2) - c->logical_minimum =(int16_t)c->logical_minimum; + /* check if there is an array of items */ + if (s->icount < s->ncount) { + /* get current usage */ + if (s->iusage < s->nusage) { + dval = s->usages_min[s->iusage] + s->ousage; + c->usage = dval; + s->usage_last = dval; + if (dval == s->usages_max[s->iusage]) { + s->iusage ++; + s->ousage = 0; + } else { + s->ousage ++; + } + } else { + /* Using last usage */ + dval = s->usage_last; } - if (s->multi < s->multimax) { - c->usage = s->usages[min(s->multi, s->nusage-1)]; - s->multi++; + s->icount ++; + /* + * Only copy HID item, increment position and return + * if correct kindset! + */ + if (s->kindset & (1 << c->kind)) { *h = *c; - /* - * 'multimax' is only non-zero if the current - * item kind is input/output/feature - */ - h->pos = s->kindpos[c->kind]; - s->kindpos[c->kind] += c->report_size; - h->next = 0; + c->pos += c->report_size * c->report_count; return (1); - } else { - c->report_count = s->multimax; - s->multimax = 0; - s->nusage = 0; - hid_clear_local(c); } } - for (;;) { - p = s->p; - if (p >= s->end) - return (0); - bSize = *p++; + /* reset state variables */ + s->icount = 0; + s->ncount = 0; + s->iusage = 0; + s->nusage = 0; + s->susage = 0; + s->ousage = 0; + hid_clear_local(c); + + /* get next item */ + while (s->p != s->end) { + + bSize = hid_get_byte(s, 1); if (bSize == 0xfe) { /* long item */ - bSize = *p++; - bSize |= *p++ << 8; - bTag = *p++; - data = p; - p += bSize; + bSize = hid_get_byte(s, 1); + bSize |= hid_get_byte(s, 1) << 8; + bTag = hid_get_byte(s, 1); + bType = 0xff; /* XXX what should it be */ } else { /* short item */ bTag = bSize >> 4; bType = (bSize >> 2) & 3; bSize &= 3; - if (bSize == 3) bSize = 4; - data = p; - p += bSize; + if (bSize == 3) + bSize = 4; } - s->p = p; - /* - * The spec is unclear if the data is signed or unsigned. - */ + switch(bSize) { case 0: dval = 0; + mask = 0; break; case 1: - dval = *data++; + dval = (int8_t)hid_get_byte(s, 1); + mask = 0xFF; break; case 2: - dval = *data++; - dval |= *data++ << 8; + dval = hid_get_byte(s, 1); + dval |= hid_get_byte(s, 1) << 8; + dval = (int16_t)dval; + mask = 0xFFFF; break; case 4: - dval = *data++; - dval |= *data++ << 8; - dval |= *data++ << 16; - dval |= *data++ << 24; + dval = hid_get_byte(s, 1); + dval |= hid_get_byte(s, 1) << 8; + dval |= hid_get_byte(s, 1) << 16; + dval |= hid_get_byte(s, 1) << 24; + mask = 0xFFFFFFFF; break; default: - return (-1); + dval = hid_get_byte(s, bSize); + continue; } switch (bType) { - case 0: /* Main */ + case 0: /* Main */ switch (bTag) { - case 8: /* Input */ - retkind = hid_input; - ret: - if (!(s->kindset & (1 << retkind))) { - /* Drop the items of this kind */ - s->nusage = 0; - continue; - } - c->kind = retkind; + case 8: /* Input */ + c->kind = hid_input; c->flags = dval; + ret: + c->report_count = s->loc_count; + c->report_size = s->loc_size; + if (c->flags & HIO_VARIABLE) { - s->multimax = c->report_count; - s->multi = 0; + /* range check usage count */ + if (c->report_count > 255) { + s->ncount = 255; + } else + s->ncount = c->report_count; + + /* + * The "top" loop will return + * one and one item: + */ c->report_count = 1; - if (s->minset) { - for (i = c->usage_minimum; - i <= c->usage_maximum; - i++) { - s->usages[s->nusage] = i; - if (s->nusage < MAXUSAGE-1) - s->nusage++; - } - c->usage_minimum = 0; - c->usage_maximum = 0; - s->minset = 0; - } - goto top; } else { - if (s->minset) - c->usage = c->usage_minimum; - *h = *c; - h->next = 0; - h->pos = s->kindpos[c->kind]; - s->kindpos[c->kind] += - c->report_size * c->report_count; - hid_clear_local(c); - s->minset = 0; - return (1); + s->ncount = 1; } - case 9: /* Output */ - retkind = hid_output; + goto top; + + case 9: /* Output */ + c->kind = hid_output; + c->flags = dval; goto ret; case 10: /* Collection */ c->kind = hid_collection; c->collection = dval; c->collevel++; - nc = *c; - hid_clear_local(c); - /*c->report_ID = NO_REPORT_ID;*/ - s->nusage = 0; - if (s->hassavedcoll) { - *h = s->savedcoll; - h->report_ID = nc.report_ID; - s->savedcoll = nc; - return (1); - } else { - s->hassavedcoll = 1; - s->savedcoll = nc; - } - break; + c->usage = s->usage_last; + *h = *c; + return (1); case 11: /* Feature */ - retkind = hid_feature; + c->kind = hid_feature; + c->flags = dval; goto ret; case 12: /* End collection */ - REPORT_SAVED_COLL; c->kind = hid_endcollection; + if (c->collevel == 0) { + /* Invalid end collection. */ + return (0); + } c->collevel--; *h = *c; - /*hid_clear_local(c);*/ - s->nusage = 0; return (1); default: - return (-2); + break; } break; @@ -315,13 +363,12 @@ hid_get_item_raw(hid_data_t s, hid_item_t *h) break; case 1: c->logical_minimum = dval; - s->logminsize = bSize; break; case 2: c->logical_maximum = dval; break; case 3: - c->physical_maximum = dval; + c->physical_minimum = dval; break; case 4: c->physical_maximum = dval; @@ -333,45 +380,97 @@ hid_get_item_raw(hid_data_t s, hid_item_t *h) c->unit = dval; break; case 7: - c->report_size = dval; + /* mask because value is unsigned */ + s->loc_size = dval & mask; break; case 8: - c->report_ID = dval; - s->kindpos[hid_input] = - s->kindpos[hid_output] = - s->kindpos[hid_feature] = 0; + hid_switch_rid(s, c, dval); break; case 9: - c->report_count = dval; + /* mask because value is unsigned */ + s->loc_count = dval & mask; break; - case 10: /* Push */ - hi = malloc(sizeof *hi); - *hi = s->cur; - c->next = hi; + case 10: /* Push */ + s->pushlevel ++; + if (s->pushlevel < MAXPUSH) { + s->cur[s->pushlevel] = *c; + /* store size and count */ + c->report_size = s->loc_size; + c->report_count = s->loc_count; + /* update current item pointer */ + c = &s->cur[s->pushlevel]; + } break; - case 11: /* Pop */ - hi = c->next; - s->cur = *hi; - free(hi); + case 11: /* Pop */ + s->pushlevel --; + if (s->pushlevel < MAXPUSH) { + /* preserve position */ + oldpos = c->pos; + c = &s->cur[s->pushlevel]; + /* restore size and count */ + s->loc_size = c->report_size; + s->loc_count = c->report_count; + /* set default item location */ + c->pos = oldpos; + c->report_size = 0; + c->report_count = 0; + } break; default: - return (-3); + break; } break; case 2: /* Local */ switch (bTag) { case 0: - c->usage = c->_usage_page | dval; - if (s->nusage < MAXUSAGE) - s->usages[s->nusage++] = c->usage; + if (bSize != 4) + dval = (dval & mask) | c->_usage_page; + + /* set last usage, in case of a collection */ + s->usage_last = dval; + + if (s->nusage < MAXUSAGE) { + s->usages_min[s->nusage] = dval; + s->usages_max[s->nusage] = dval; + s->nusage ++; + } /* else XXX */ + + /* clear any pending usage sets */ + s->susage = 0; break; case 1: - s->minset = 1; - c->usage_minimum = c->_usage_page | dval; - break; + s->susage |= 1; + + if (bSize != 4) + dval = (dval & mask) | c->_usage_page; + c->usage_minimum = dval; + + goto check_set; case 2: - c->usage_maximum = c->_usage_page | dval; + s->susage |= 2; + + if (bSize != 4) + dval = (dval & mask) | c->_usage_page; + c->usage_maximum = dval; + + check_set: + if (s->susage != 3) + break; + + /* sanity check */ + if ((s->nusage < MAXUSAGE) && + (c->usage_minimum <= c->usage_maximum)) { + /* add usage range */ + s->usages_min[s->nusage] = + c->usage_minimum; + s->usages_max[s->nusage] = + c->usage_maximum; + s->nusage ++; + } + /* else XXX */ + + s->susage = 0; break; case 3: c->designator_index = dval; @@ -395,40 +494,63 @@ hid_get_item_raw(hid_data_t s, hid_item_t *h) c->set_delimiter = dval; break; default: - return (-4); + break; } break; default: - return (-5); + break; } } + return (0); } int hid_report_size(report_desc_t r, enum hid_kind k, int id) { struct hid_data *d; - hid_item_t h; - int size; + struct hid_item h; + uint32_t temp; + uint32_t hpos; + uint32_t lpos; + + hpos = 0; + lpos = 0xFFFFFFFF; memset(&h, 0, sizeof h); - size = 0; - for (d = hid_start_parse(r, 1<kindpos[k]; + /* compute minimum */ + if (lpos > h.pos) + lpos = h.pos; + /* compute end position */ + temp = h.pos + (h.report_size * h.report_count); + /* compute maximum */ + if (hpos < temp) + hpos = temp; } } hid_end_parse(d); - return ((size + 7) / 8); + + /* safety check - can happen in case of currupt descriptors */ + if (lpos > hpos) + temp = 0; + else + temp = hpos - lpos; + + if (id) + temp += 8; + + /* return length in bytes rounded up */ + return ((temp + 7) / 8); } int hid_locate(report_desc_t desc, unsigned int u, enum hid_kind k, hid_item_t *h, int id) { - hid_data_t d; + struct hid_data *d; - for (d = hid_start_parse(desc, 1<kind == k && !(h->flags & HIO_CONST) && h->usage == u) { hid_end_parse(d); return (1); diff --git a/lib/libusbhid/usage.c b/lib/libusbhid/usage.c index 280ef69..eeff818 100644 --- a/lib/libusbhid/usage.c +++ b/lib/libusbhid/usage.c @@ -29,6 +29,7 @@ #include __FBSDID("$FreeBSD$"); +#include #include #include #include diff --git a/lib/libusbhid/usbhid.h b/lib/libusbhid/usbhid.h index 90f0f03..df5f7c1 100644 --- a/lib/libusbhid/usbhid.h +++ b/lib/libusbhid/usbhid.h @@ -29,7 +29,7 @@ * */ -#include +#include #include typedef struct report_desc *report_desc_t; @@ -37,45 +37,41 @@ typedef struct report_desc *report_desc_t; typedef struct hid_data *hid_data_t; typedef enum hid_kind { - hid_input = 0, - hid_output = 1, - hid_feature = 2, - hid_collection, - hid_endcollection + hid_input, hid_output, hid_feature, hid_collection, hid_endcollection } hid_kind_t; typedef struct hid_item { /* Global */ - unsigned int _usage_page; - int logical_minimum; - int logical_maximum; - int physical_minimum; - int physical_maximum; - int unit_exponent; - int unit; - int report_size; - int report_ID; + uint32_t _usage_page; + int32_t logical_minimum; + int32_t logical_maximum; + int32_t physical_minimum; + int32_t physical_maximum; + int32_t unit_exponent; + int32_t unit; + int32_t report_size; + int32_t report_ID; #define NO_REPORT_ID 0 - int report_count; + int32_t report_count; /* Local */ - unsigned int usage; - int usage_minimum; - int usage_maximum; - int designator_index; - int designator_minimum; - int designator_maximum; - int string_index; - int string_minimum; - int string_maximum; - int set_delimiter; + uint32_t usage; + int32_t usage_minimum; + int32_t usage_maximum; + int32_t designator_index; + int32_t designator_minimum; + int32_t designator_maximum; + int32_t string_index; + int32_t string_minimum; + int32_t string_maximum; + int32_t set_delimiter; /* Misc */ - int collection; - int collevel; + int32_t collection; + int collevel; enum hid_kind kind; - unsigned int flags; - /* Absolute data position (bits) */ - unsigned int pos; - /* */ + uint32_t flags; + /* Location */ + uint32_t pos; + /* unused */ struct hid_item *next; } hid_item_t; @@ -96,7 +92,8 @@ hid_data_t hid_start_parse(report_desc_t d, int kindset, int id); void hid_end_parse(hid_data_t s); int hid_get_item(hid_data_t s, hid_item_t *h); int hid_report_size(report_desc_t d, enum hid_kind k, int id); -int hid_locate(report_desc_t d, unsigned int usage, enum hid_kind k, hid_item_t *h, int id); +int hid_locate(report_desc_t d, unsigned int usage, enum hid_kind k, + hid_item_t *h, int id); /* Conversion to/from usage names, usage.c: */ const char *hid_usage_page(int i); diff --git a/lib/libusbhid/usbvar.h b/lib/libusbhid/usbvar.h index d437005..9605106 100644 --- a/lib/libusbhid/usbvar.h +++ b/lib/libusbhid/usbvar.h @@ -30,8 +30,8 @@ */ struct report_desc { - unsigned int size; - unsigned char data[1]; + uint32_t size; + uint8_t data[1]; }; /* internal backwards compatibility functions */ -- cgit v1.1 From 66f9b83110b17ba68a936b1076cc39796bdbad2d Mon Sep 17 00:00:00 2001 From: gordon Date: Wed, 19 May 2010 22:03:45 +0000 Subject: MFC r207981: Fix a bug due to a type conversion from 64 to 32 bits. The side effect of this type conversion is the high bits which were used to indicate if a special character was a literal or special were dropped. As a result, all special character were treated as special, even if they were supposed to be literals. Approved by: mentor (wes@) --- lib/libc/gen/glob.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/glob.c b/lib/libc/gen/glob.c index 32af5db..c3d9f08 100644 --- a/lib/libc/gen/glob.c +++ b/lib/libc/gen/glob.c @@ -433,9 +433,9 @@ static int glob0(const Char *pattern, glob_t *pglob, size_t *limit) { const Char *qpatnext; - int c, err; + int err; size_t oldpathc; - Char *bufnext, patbuf[MAXPATHLEN]; + Char *bufnext, c, patbuf[MAXPATHLEN]; qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob); oldpathc = pglob->gl_pathc; -- cgit v1.1 From c6cf476a04a39801e182bd55572c1f23bdc6860f Mon Sep 17 00:00:00 2001 From: trasz Date: Thu, 20 May 2010 18:45:07 +0000 Subject: MFC r208033: Make it possible to actually use NFSv4 permission bits with acl_set_perm(3) and acl_delete_perm(3). It went undetected, because neither setfacl(1) nor Samba use this routines. D'oh. --- lib/libc/posix1e/acl_perm.c | 52 ++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_perm.c b/lib/libc/posix1e/acl_perm.c index 37d29d6..b5108ca 100644 --- a/lib/libc/posix1e/acl_perm.c +++ b/lib/libc/posix1e/acl_perm.c @@ -35,6 +35,20 @@ __FBSDID("$FreeBSD$"); #include #include +static int +_perm_is_invalid(acl_perm_t perm) +{ + + /* Check if more than a single bit is set. */ + if ((perm & -perm) == perm && + (perm & (ACL_POSIX1E_BITS | ACL_NFS4_PERM_BITS)) == perm) + return (0); + + errno = EINVAL; + + return (1); +} + /* * acl_add_perm() (23.4.1): add the permission contained in perm to the * permission set permset_d @@ -43,18 +57,17 @@ int acl_add_perm(acl_permset_t permset_d, acl_perm_t perm) { - if (permset_d) { - switch(perm) { - case ACL_READ: - case ACL_WRITE: - case ACL_EXECUTE: - *permset_d |= perm; - return (0); - } + if (permset_d == NULL) { + errno = EINVAL; + return (-1); } - errno = EINVAL; - return (-1); + if (_perm_is_invalid(perm)) + return (-1); + + *permset_d |= perm; + + return (0); } /* @@ -83,16 +96,15 @@ int acl_delete_perm(acl_permset_t permset_d, acl_perm_t perm) { - if (permset_d) { - switch(perm) { - case ACL_READ: - case ACL_WRITE: - case ACL_EXECUTE: - *permset_d &= ~(perm & ACL_PERM_BITS); - return (0); - } + if (permset_d == NULL) { + errno = EINVAL; + return (-1); } - errno = EINVAL; - return (-1); + if (_perm_is_invalid(perm)) + return (-1); + + *permset_d &= ~perm; + + return (0); } -- cgit v1.1 From 98afbc1457cefd1e8de545be6151baacadbf37dc Mon Sep 17 00:00:00 2001 From: trasz Date: Thu, 20 May 2010 18:46:11 +0000 Subject: MFC r208034: Make branding less intrusive - in acl_set(3), in case ACL brand is ACL_BRAND_UNKNOWN, do what the programmer says instead of failing. --- lib/libc/posix1e/acl_branding.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_branding.c b/lib/libc/posix1e/acl_branding.c index 9839477..84b0063 100644 --- a/lib/libc/posix1e/acl_branding.c +++ b/lib/libc/posix1e/acl_branding.c @@ -129,6 +129,9 @@ _acl_type_not_valid_for_acl(const acl_t acl, acl_type_t type) if (type == ACL_TYPE_ACCESS || type == ACL_TYPE_DEFAULT) return (0); break; + + case ACL_BRAND_UNKNOWN: + return (0); } return (-1); -- cgit v1.1 From f70ec033b6d3a079e307f66ef62c8b1274671dbf Mon Sep 17 00:00:00 2001 From: marius Date: Sun, 23 May 2010 11:18:05 +0000 Subject: MFC: r208341 Update to a config.h created by a file 5.03 configure script. This causes file.1 to contain the correct version number and SIZEOF_LONG_LONG to be defined as appropriate, which is crucial for 64-bit big-endian ELF files to be handled correctly on big-endian systems. PR: 146387 Reviewed by: delphij --- lib/libmagic/config.h | 95 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 38 deletions(-) (limited to 'lib') diff --git a/lib/libmagic/config.h b/lib/libmagic/config.h index 1e97361..8574ddc 100644 --- a/lib/libmagic/config.h +++ b/lib/libmagic/config.h @@ -1,18 +1,18 @@ /* $FreeBSD$ */ /* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.in by autoheader. */ +/* config.h.in. Generated from configure.ac by autoheader. */ -/* Use the builtin ELF recognition code */ +/* Define in built-in ELF support is used */ #define BUILTIN_ELF 1 -/* Recognize ELF core files */ +/* Define for ELF core file support */ #define ELFCORE 1 /* Define to 1 if you have the `asprintf' function. */ #define HAVE_ASPRINTF 1 -/* */ +/* HAVE_DAYLIGHT */ /* #undef HAVE_DAYLIGHT */ /* Define to 1 if you have the header file. */ @@ -51,9 +51,6 @@ /* Define to 1 if you have the header file. */ #define HAVE_LOCALE_H 1 -/* */ -#define HAVE_LONG_LONG 1 - /* Define to 1 if you have the `mbrtowc' function. */ #define HAVE_MBRTOWC 1 @@ -69,9 +66,6 @@ /* Define to 1 if you have the `mmap' function. */ #define HAVE_MMAP 1 -/* Define to 1 if you have the `snprintf' function. */ -#define HAVE_SNPRINTF 1 - /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 @@ -102,13 +96,12 @@ /* Define to 1 if you have the `strtoul' function. */ #define HAVE_STRTOUL 1 +/* HAVE_STRUCT_OPTION */ +#define HAVE_STRUCT_OPTION 1 + /* Define to 1 if `st_rdev' is member of `struct stat'. */ #define HAVE_STRUCT_STAT_ST_RDEV 1 -/* Define to 1 if your `struct stat' has `st_rdev'. Deprecated, use - `HAVE_STRUCT_STAT_ST_RDEV' instead. */ -#define HAVE_ST_RDEV 1 - /* Define to 1 if `tm_gmtoff' is member of `struct tm'. */ #define HAVE_STRUCT_TM_TM_GMTOFF 1 @@ -133,7 +126,7 @@ /* Define to 1 if you have that is POSIX.1 compatible. */ #define HAVE_SYS_WAIT_H 1 -/* */ +/* HAVE_TM_ISDST */ #define HAVE_TM_ISDST 1 /* HAVE_TM_ZONE */ @@ -169,9 +162,6 @@ /* Define to 1 if you have the `vasprintf' function. */ #define HAVE_VASPRINTF 1 -/* Define to 1 if you have the `vsnprintf' function. */ -#define HAVE_VSNPRINTF 1 - /* Define to 1 if you have the header file. */ #define HAVE_WCHAR_H 1 @@ -199,34 +189,22 @@ #define PACKAGE "file" /* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" +#define PACKAGE_BUGREPORT "christos@astron.com" /* Define to the full name of this package. */ -#define PACKAGE_NAME "" +#define PACKAGE_NAME "file" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "" +#define PACKAGE_STRING "file 5.03" /* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "" +#define PACKAGE_TARNAME "file" /* Define to the version of this package. */ -#define PACKAGE_VERSION VERSION - -/* */ -#define SIZEOF_INT64_T 8 +#define PACKAGE_VERSION "5.03" -/* */ -#define SIZEOF_UINT16_T 2 - -/* */ -#define SIZEOF_UINT32_T 4 - -/* */ -#define SIZEOF_UINT64_T 8 - -/* */ -#define SIZEOF_UINT8_T 1 +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 @@ -235,7 +213,7 @@ /* #undef TM_IN_SYS_TIME */ /* Version number of package */ -#define VERSION "5.00" +#define VERSION "5.03" /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ @@ -247,8 +225,49 @@ #endif #endif +/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ +/* #undef _LARGEFILE_SOURCE */ + /* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ + +/* Define to a type if does not define. */ +/* #undef mbstate_t */ + +/* Define to `long int' if does not define. */ +/* #undef off_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ + + +#ifndef HAVE_UINT8_T +typedef unsigned char uint8_t; +#endif +#ifndef HAVE_UINT16_T +typedef unsigned short uint16_t; +#endif +#ifndef HAVE_UINT32_T +typedef unsigned int uint32_t; +#endif +#ifndef HAVE_INT32_T +typedef int int32_t; +#endif +#ifndef HAVE_UINT64_T +#if SIZEOF_LONG_LONG == 8 +typedef unsigned long long uint64_t; +#else +typedef unsigned long uint64_t; +#endif +#endif +#ifndef HAVE_INT64_T +#if SIZEOF_LONG_LONG == 8 +typedef long long int64_t; +#else +typedef long int64_t; +#endif +#endif + -- cgit v1.1 From 53d0fa35580ecc3df5f178617ddf8d12849a0690 Mon Sep 17 00:00:00 2001 From: rstone Date: Mon, 24 May 2010 19:42:27 +0000 Subject: MFC r207482 When configuring hwpmc to use the EXT_SNOOP event, only send a default cachestate qualifier on the Atom processor. Other Intel processors do not accept a cachestate qualifier and currently hwpmc will return EINVAL if you try to use the EXT_SNOOP event on those processors Approved by: emaste (mentor) --- lib/libpmc/libpmc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 929ca73..551fe12 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -716,9 +716,16 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec, case PMC_EV_IAP_EVENT_40H: /* Core */ case PMC_EV_IAP_EVENT_41H: /* Core */ case PMC_EV_IAP_EVENT_42H: /* Core, Core2, Atom */ - case PMC_EV_IAP_EVENT_77H: /* Core */ if (cachestate == 0) cachestate = (0xF << 8); + break; + case PMC_EV_IAP_EVENT_77H: /* Atom */ + /* IAP_EVENT_77H only accepts a cachestate qualifier on the + * Atom processor + */ + if(cpu_info.pm_cputype == PMC_CPU_INTEL_ATOM && cachestate == 0) + cachestate = (0xF << 8); + break; default: break; } -- cgit v1.1 From cecdfcbeea0822ed4b1ce847da15efb757dfa5d5 Mon Sep 17 00:00:00 2001 From: cperciva Date: Thu, 27 May 2010 03:15:04 +0000 Subject: Change the current working directory to be inside the jail created by the jail(8) command. [10:04] Fix a one-NUL-byte buffer overflow in libopie. [10:05] Correctly sanity-check a buffer length in nfs mount. [10:06] Approved by: so (cperciva) Approved by: re (kensmith) Security: FreeBSD-SA-10:04.jail Security: FreeBSD-SA-10:05.opie Security: FreeBSD-SA-10:06.nfsclient --- lib/libc/sys/mount.2 | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/sys/mount.2 b/lib/libc/sys/mount.2 index 6ce2d4d..3d48f41 100644 --- a/lib/libc/sys/mount.2 +++ b/lib/libc/sys/mount.2 @@ -107,7 +107,7 @@ This restriction can be removed by setting the .Va vfs.usermount .Xr sysctl 8 variable -to a non-zero value. +to a non-zero value; see the BUGS section for more information. .Pp The following .Fa flags @@ -370,3 +370,10 @@ functions appeared in .At v6 . .Sh BUGS Some of the error codes need translation to more obvious messages. +.Pp +Allowing untrusted users to mount arbitrary media, e.g. by enabling +.Va vfs.usermount , +should not be considered safe. +Most file systems in +.Fx +were not built to safeguard against malicious devices. -- cgit v1.1 From 3c153e0b018436cca9346857ea8a99ab379b01ea Mon Sep 17 00:00:00 2001 From: uqs Date: Sun, 30 May 2010 08:38:41 +0000 Subject: MFC r208493: These features will first appear in 8.1, soon to be released Approved by: re (kib) --- lib/libc/compat-43/sigpause.2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/compat-43/sigpause.2 b/lib/libc/compat-43/sigpause.2 index bf3cf0b..0bea04e 100644 --- a/lib/libc/compat-43/sigpause.2 +++ b/lib/libc/compat-43/sigpause.2 @@ -237,5 +237,5 @@ function appeared in .Bx 4.2 and has been deprecated. All other functions appeared in -.Fx 9.0 +.Fx 8.1 and were deprecated before being implemented. -- cgit v1.1 From ae44b4322ea606485db5239ff83cdf784e472941 Mon Sep 17 00:00:00 2001 From: rwatson Date: Tue, 1 Jun 2010 14:09:35 +0000 Subject: Merge r204435 from head to stable/8: Fix typo in comment. Approved by: re (bz) --- lib/libkvm/kvm_vnet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libkvm/kvm_vnet.c b/lib/libkvm/kvm_vnet.c index d192c67..a5c8aad 100644 --- a/lib/libkvm/kvm_vnet.c +++ b/lib/libkvm/kvm_vnet.c @@ -117,8 +117,8 @@ _kvm_vnet_selectpid(kvm_t *kd, pid_t pid) } /* - * First, find the process for this pid. If we are workig on a dump, - * either locate the thread dumptid is refering to or proc0. + * First, find the process for this pid. If we are working on a + * dump, either locate the thread dumptid is refering to or proc0. * Based on either, take the address of the ucred. */ credp = 0; -- cgit v1.1 From 2d5cd857113bfc9af9145ac6ad2e8cf7923dc8a9 Mon Sep 17 00:00:00 2001 From: trasz Date: Wed, 2 Jun 2010 20:35:56 +0000 Subject: MFC r208437: Make acl_get_perm_np(3) work with NFSv4 ACLs. Reviewed by: kientzle@ Approved by: re (kib) --- lib/libc/posix1e/acl_get.c | 24 ------------------------ lib/libc/posix1e/acl_perm.c | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_get.c b/lib/libc/posix1e/acl_get.c index 8a77ab1..b323501 100644 --- a/lib/libc/posix1e/acl_get.c +++ b/lib/libc/posix1e/acl_get.c @@ -132,30 +132,6 @@ acl_get_fd_np(int fd, acl_type_t type) return (aclp); } -int -acl_get_perm_np(acl_permset_t permset_d, acl_perm_t perm) -{ - - if (permset_d == NULL) { - errno = EINVAL; - return (-1); - } - - switch(perm) { - case ACL_READ: - case ACL_WRITE: - case ACL_EXECUTE: - if (*permset_d & perm) - return (1); - break; - default: - errno = EINVAL; - return (-1); - } - - return (0); -} - /* * acl_get_permset() (23.4.17): return via permset_p a descriptor to * the permission set in the ACL entry entry_d. diff --git a/lib/libc/posix1e/acl_perm.c b/lib/libc/posix1e/acl_perm.c index b5108ca..ad2f47e 100644 --- a/lib/libc/posix1e/acl_perm.c +++ b/lib/libc/posix1e/acl_perm.c @@ -108,3 +108,21 @@ acl_delete_perm(acl_permset_t permset_d, acl_perm_t perm) return (0); } + +int +acl_get_perm_np(acl_permset_t permset_d, acl_perm_t perm) +{ + + if (permset_d == NULL) { + errno = EINVAL; + return (-1); + } + + if (_perm_is_invalid(perm)) + return (-1); + + if (*permset_d & perm) + return (1); + + return (0); +} -- cgit v1.1 From 7ebb302abcbc0ab56797f28e24a3769665e41f3c Mon Sep 17 00:00:00 2001 From: brian Date: Wed, 9 Jun 2010 07:31:41 +0000 Subject: MFC r197477 - clean up keys deleted via pthread_key_delete() PR: 135462 Approved by: re (kib) Nod timeout: davidxu --- lib/libthr/thread/thr_spec.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_spec.c b/lib/libthr/thread/thr_spec.c index 06d1287..0740d6b 100644 --- a/lib/libthr/thread/thr_spec.c +++ b/lib/libthr/thread/thr_spec.c @@ -131,9 +131,19 @@ _thread_cleanupspecific(void) curthread->specific[key].data = NULL; curthread->specific_data_count--; } + else if (curthread->specific[key].data != NULL) { + /* + * This can happen if the key is deleted via + * pthread_key_delete without first setting the value + * to NULL in all threads. POSIX says that the + * destructor is not invoked in this case. + */ + curthread->specific[key].data = NULL; + curthread->specific_data_count--; + } /* - * If there is a destructore, call it + * If there is a destructor, call it * with the key table entry unlocked: */ if (destructor != NULL) { -- cgit v1.1 From 14ae56e45249ff7263e0a4b2d5c298e8c6e38e23 Mon Sep 17 00:00:00 2001 From: trasz Date: Fri, 11 Jun 2010 15:21:12 +0000 Subject: Fix usage of uninitialized variable. Found with: Coverity Prevent CID: 7517 Approved by: re (kib) --- lib/libc/posix1e/acl_strip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_strip.c b/lib/libc/posix1e/acl_strip.c index 1dcdfcd..8aa850f 100644 --- a/lib/libc/posix1e/acl_strip.c +++ b/lib/libc/posix1e/acl_strip.c @@ -46,7 +46,7 @@ static acl_t _nfs4_acl_strip_np(const acl_t aclp, int recalculate_mask) { acl_t newacl; - mode_t mode; + mode_t mode = 0; newacl = acl_init(ACL_MAX_ENTRIES); if (newacl == NULL) { -- cgit v1.1 From 693ced2a4639e406f17a09ace2b38edb38d0f194 Mon Sep 17 00:00:00 2001 From: trasz Date: Fri, 11 Jun 2010 15:26:15 +0000 Subject: MFC r208811: Don't use pointer to 64 bit value (id_t) to point to 32 bit value (uid_t). Found with: Coverity Prevent CID: 7466, 7467 Approved by: re (kib) --- lib/libc/posix1e/acl_to_text_nfs4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/libc/posix1e/acl_to_text_nfs4.c b/lib/libc/posix1e/acl_to_text_nfs4.c index 5d0aa31..2ea92d9 100644 --- a/lib/libc/posix1e/acl_to_text_nfs4.c +++ b/lib/libc/posix1e/acl_to_text_nfs4.c @@ -50,7 +50,7 @@ format_who(char *str, size_t size, const acl_entry_t entry, int numeric) acl_tag_t tag; struct passwd *pwd; struct group *grp; - id_t *id; + uid_t *id; error = acl_get_tag_type(entry, &tag); if (error) @@ -62,7 +62,7 @@ format_who(char *str, size_t size, const acl_entry_t entry, int numeric) break; case ACL_USER: - id = (id_t *)acl_get_qualifier(entry); + id = (uid_t *)acl_get_qualifier(entry); if (id == NULL) return (-1); /* XXX: Thread-unsafe. */ @@ -81,7 +81,7 @@ format_who(char *str, size_t size, const acl_entry_t entry, int numeric) break; case ACL_GROUP: - id = (id_t *)acl_get_qualifier(entry); + id = (uid_t *)acl_get_qualifier(entry); if (id == NULL) return (-1); /* XXX: Thread-unsafe. */ @@ -141,7 +141,7 @@ format_additional_id(char *str, size_t size, const acl_entry_t entry) { int error; acl_tag_t tag; - id_t *id; + uid_t *id; error = acl_get_tag_type(entry, &tag); if (error) @@ -155,7 +155,7 @@ format_additional_id(char *str, size_t size, const acl_entry_t entry) break; default: - id = (id_t *)acl_get_qualifier(entry); + id = (uid_t *)acl_get_qualifier(entry); if (id == NULL) return (-1); snprintf(str, size, ":%d", (unsigned int)*id); -- cgit v1.1 From 2b4920d05f55f6e6d79794be2b9d00c3001823e5 Mon Sep 17 00:00:00 2001 From: delphij Date: Sat, 12 Jun 2010 05:21:29 +0000 Subject: MFC r209078: Detect bit endianness through machine/endian.h. This fixes xz on big-endian systems. Tested on: sparc64 (kindly provided by linimon), amd64 Approved by: re (kensmith) --- lib/liblzma/config.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/liblzma/config.h b/lib/liblzma/config.h index 223cb95..e7b61bb 100644 --- a/lib/liblzma/config.h +++ b/lib/liblzma/config.h @@ -84,6 +84,12 @@ # define __EXTENSIONS__ 1 #endif #define VERSION "4.999.9beta" +#if defined(__FreeBSD__) +#include +#if _BYTE_ORDER == _BIG_ENDIAN +# define WORDS_BIGENDIAN 1 +#endif +#else #if defined AC_APPLE_UNIVERSAL_BUILD # if defined __BIG_ENDIAN__ # define WORDS_BIGENDIAN 1 @@ -93,3 +99,4 @@ /* # undef WORDS_BIGENDIAN */ # endif #endif +#endif -- cgit v1.1 From 1cceb68541d136b076fa2956a754b7c258258b2e Mon Sep 17 00:00:00 2001 From: des Date: Tue, 22 Jun 2010 19:56:07 +0000 Subject: merge r196650 from head (via stable/8): tty might be NULL Approved by: re (kib@) --- lib/libpam/modules/pam_lastlog/pam_lastlog.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/libpam/modules/pam_lastlog/pam_lastlog.c b/lib/libpam/modules/pam_lastlog/pam_lastlog.c index 817d1f3..9899f8d 100644 --- a/lib/libpam/modules/pam_lastlog/pam_lastlog.c +++ b/lib/libpam/modules/pam_lastlog/pam_lastlog.c @@ -183,6 +183,11 @@ pam_sm_close_session(pam_handle_t *pamh __unused, int flags __unused, pam_err = pam_get_item(pamh, PAM_TTY, (const void **)&tty); if (pam_err != PAM_SUCCESS) goto err; + if (tty == NULL) { + PAM_LOG("No PAM_TTY"); + pam_err = PAM_SERVICE_ERR; + goto err; + } if (strncmp(tty, _PATH_DEV, strlen(_PATH_DEV)) == 0) tty = (const char *)tty + strlen(_PATH_DEV); if (*(const char *)tty == '\0') -- cgit v1.1 From d45b7f14ae6fa78882fa9ec3be976733ca4767b4 Mon Sep 17 00:00:00 2001 From: grehan Date: Fri, 13 May 2011 04:54:01 +0000 Subject: Import of bhyve hypervisor and utilities, part 1. vmm.ko - kernel module for VT-x, VT-d and hypervisor control bhyve - user-space sequencer and i/o emulation vmmctl - dump of hypervisor register state libvmm - front-end to vmm.ko chardev interface bhyve was designed and implemented by Neel Natu. Thanks to the following folk from NetApp who helped to make this available: Joe CaraDonna Peter Snyder Jeff Heller Sandeep Mann Steve Miller Brian Pawlowski --- lib/Makefile | 2 + lib/libvmmapi/Makefile | 9 + lib/libvmmapi/mptable.c | 336 +++++++++++++++++++++ lib/libvmmapi/mptable.h | 171 +++++++++++ lib/libvmmapi/vmmapi.c | 647 +++++++++++++++++++++++++++++++++++++++++ lib/libvmmapi/vmmapi.h | 98 +++++++ lib/libvmmapi/vmmapi_freebsd.c | 187 ++++++++++++ 7 files changed, 1450 insertions(+) create mode 100644 lib/libvmmapi/Makefile create mode 100644 lib/libvmmapi/mptable.c create mode 100644 lib/libvmmapi/mptable.h create mode 100644 lib/libvmmapi/vmmapi.c create mode 100644 lib/libvmmapi/vmmapi.h create mode 100644 lib/libvmmapi/vmmapi_freebsd.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index e9f4ec3..8f33206 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,6 +102,7 @@ SUBDIR= ${SUBDIR_ORDERED} \ ${_libusbhid} \ ${_libusb} \ ${_libvgl} \ + ${_libvmmapi} \ libwrap \ liby \ libz \ @@ -177,6 +178,7 @@ _libncp= libncp .endif _libsmb= libsmb _libvgl= libvgl +_libvmmapi= libvmmapi .endif .if ${MACHINE_ARCH} == "powerpc" diff --git a/lib/libvmmapi/Makefile b/lib/libvmmapi/Makefile new file mode 100644 index 0000000..492391f --- /dev/null +++ b/lib/libvmmapi/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +LIB= vmmapi +SRCS= vmmapi.c vmmapi_freebsd.c mptable.c +INCS= vmmapi.h + +CFLAGS+= -I${.CURDIR} + +.include diff --git a/lib/libvmmapi/mptable.c b/lib/libvmmapi/mptable.c new file mode 100644 index 0000000..1aea61a --- /dev/null +++ b/lib/libvmmapi/mptable.c @@ -0,0 +1,336 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include + +#include "vmmapi.h" +#include "mptable.h" + +#define LAPIC_PADDR (0xFEE00000) +#define LAPIC_VERSION (16) + +#define IOAPIC_PADDR (0xFEC00000) +#define IOAPIC_VERSION (0x11) + +extern int errno; + +static uint8_t +mp_compute_checksum(void *base, size_t len) +{ + uint8_t *bytes = base; + uint8_t sum = 0; + for(; len > 0; len--) { + sum += *bytes++; + } + return 256 - sum; +} + +static void +mp_build_mpfp(struct mp_floating_pointer *mpfp, vm_paddr_t mpfp_gpa) +{ + memset(mpfp, 0, sizeof(*mpfp)); + memcpy(mpfp->signature, MPFP_SIGNATURE, MPFP_SIGNATURE_LEN); + mpfp->mptable_paddr = mpfp_gpa + sizeof(*mpfp); + mpfp->specrev = MP_SPECREV; + mpfp->feature2 = 0; + mpfp->checksum = mp_compute_checksum(mpfp, sizeof(*mpfp)); +} + +static void +mp_build_mpch(struct mp_config_hdr *mpch) +{ + memset(mpch, 0, sizeof(*mpch)); + mpch->specrev = MP_SPECREV; + memcpy(mpch->signature, MPCH_SIGNATURE, MPCH_SIGNATURE_LEN); + memcpy(mpch->oemid, MPCH_OEMID, MPCH_OEMID_LEN); + memcpy(mpch->prodid, MPCH_PRODID, MPCH_PRODID_LEN); + mpch->lapic_paddr = LAPIC_PADDR; + + +} + +static void +mp_build_proc_entries(struct mpe_proc *mpep, int num_proc) +{ + int i; + + for (i = 0; i < num_proc; i++) { + memset(mpep, 0, sizeof(*mpep)); + mpep->entry_type = MP_ENTRY_PROC; + mpep->lapic_id = i; // XXX + mpep->lapic_version = LAPIC_VERSION; + mpep->proc_flags = (i == 0)?MPEP_FLAGS_BSP:0; + mpep->proc_flags |= MPEP_FLAGS_EN; + mpep->proc_signature = MPEP_SIGNATURE; + mpep->feature_flags = MPEP_FEATURES; + mpep++; + } + +} + +static void +mp_build_bus_entries(struct mpe_bus *mpeb) +{ + memset(mpeb, 0, sizeof(*mpeb)); + mpeb->entry_type = MP_ENTRY_BUS; + mpeb->busid = MPE_BUSID_ISA; + memcpy(mpeb->busname, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN); + mpeb++; + + memset(mpeb, 0, sizeof(*mpeb)); + mpeb->entry_type = MP_ENTRY_BUS; + mpeb->busid = MPE_BUSID_PCI; + memcpy(mpeb->busname, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN); + +} + +static void +mp_build_ioapic_entries(struct mpe_ioapic *mpei) +{ + memset(mpei, 0, sizeof(*mpei)); + mpei->entry_type = MP_ENTRY_IOAPIC; + mpei->ioapic_id = MPE_IOAPIC_ID; + mpei->ioapic_version = IOAPIC_VERSION; + mpei->ioapic_flags = MPE_IOAPIC_FLAG_EN; + mpei->ioapic_paddr = IOAPIC_PADDR; +} + +static void +mp_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins) +{ + int pin; + + /* + * The following config is taken from kernel mptable.c + * mptable_parse_default_config_ints(...), for now + * just use the default config, tweek later if needed. + */ + + + /* Run through all 16 pins. */ + for (pin = 0; pin < num_pins; pin++) { + memset(mpeii, 0, sizeof(*mpeii)); + mpeii->entry_type = MP_ENTRY_IOINT; + mpeii->src_bus_id = MPE_BUSID_ISA; + mpeii->dst_apic_id = MPE_IOAPIC_ID; + + /* + * All default configs route IRQs from bus 0 to the first 16 pins + * of the first I/O APIC with an APIC ID of 2. + */ + mpeii->dst_apic_intin = pin; + switch (pin) { + case 0: + /* Pin 0 is an ExtINT pin. */ + mpeii->intr_type = MPEII_INTR_EXTINT; + break; + case 2: + /* IRQ 0 is routed to pin 2. */ + mpeii->intr_type = MPEII_INTR_INT; + mpeii->src_bus_irq = 0; + break; + case 5: + case 10: + case 11: + /* + * PCI Irqs set to level triggered. + */ + mpeii->intr_flags = MPEII_FLAGS_TRIGMODE_LEVEL; + mpeii->src_bus_id = MPE_BUSID_PCI; + default: + /* All other pins are identity mapped. */ + mpeii->intr_type = MPEII_INTR_INT; + mpeii->src_bus_irq = pin; + break; + } + mpeii++; + } + +} + +#define COPYSTR(dest, src, bytes) \ + memcpy(dest, src, bytes); \ + str[bytes] = 0; + + +static void +mptable_dump(struct mp_floating_pointer *mpfp, struct mp_config_hdr *mpch) +{ + static char str[16]; + int i; + char *cur; + + union mpe { + struct mpe_proc *proc; + struct mpe_bus *bus; + struct mpe_ioapic *ioapic; + struct mpe_ioint *ioint; + struct mpe_lint *lnit; + char *p; + }; + + union mpe mpe; + + printf(" MP Floating Pointer :\n"); + COPYSTR(str, mpfp->signature, 4); + printf(" signature: %s\n", str); + printf(" mpch paddr: %x\n", mpfp->mptable_paddr); + printf(" length: %x\n", mpfp->length); + printf(" specrec: %x\n", mpfp->specrev); + printf(" checksum: %x\n", mpfp->checksum); + printf(" feature1: %x\n", mpfp->feature1); + printf(" feature2: %x\n", mpfp->feature2); + printf(" feature3: %x\n", mpfp->feature3); + printf(" feature4: %x\n", mpfp->feature4); + + printf(" MP Configuration Header :\n"); + COPYSTR(str, mpch->signature, 4); + printf(" signature: %s\n", str); + printf(" length: %x\n", mpch->length); + printf(" specrec: %x\n", mpch->specrev); + printf(" checksum: %x\n", mpch->checksum); + COPYSTR(str, mpch->oemid, MPCH_OEMID_LEN); + printf(" oemid: %s\n", str); + COPYSTR(str, mpch->prodid, MPCH_PRODID_LEN); + printf(" prodid: %s\n", str); + printf(" oem_ptr: %x\n", mpch->oem_ptr); + printf(" oem_sz: %x\n", mpch->oem_sz); + printf(" nr_entries: %x\n", mpch->nr_entries); + printf(" apic paddr: %x\n", mpch->lapic_paddr); + printf(" ext_length: %x\n", mpch->ext_length); + printf(" ext_checksum: %x\n", mpch->ext_checksum); + + cur = (char *)mpch + sizeof(*mpch); + for (i = 0; i < mpch->nr_entries; i++) { + mpe.p = cur; + switch(*mpe.p) { + case MP_ENTRY_PROC: + printf(" MP Processor Entry :\n"); + printf(" lapic_id: %x\n", mpe.proc->lapic_id); + printf(" lapic_version: %x\n", mpe.proc->lapic_version); + printf(" proc_flags: %x\n", mpe.proc->proc_flags); + printf(" proc_signature: %x\n", mpe.proc->proc_signature); + printf(" feature_flags: %x\n", mpe.proc->feature_flags); + cur += sizeof(struct mpe_proc); + break; + case MP_ENTRY_BUS: + printf(" MP Bus Entry :\n"); + printf(" busid: %x\n", mpe.bus->busid); + COPYSTR(str, mpe.bus->busname, MPE_BUSNAME_LEN); + printf(" busname: %s\n", str); + cur += sizeof(struct mpe_bus); + break; + case MP_ENTRY_IOAPIC: + printf(" MP IOAPIC Entry :\n"); + printf(" ioapi_id: %x\n", mpe.ioapic->ioapic_id); + printf(" ioapi_version: %x\n", mpe.ioapic->ioapic_version); + printf(" ioapi_flags: %x\n", mpe.ioapic->ioapic_flags); + printf(" ioapi_paddr: %x\n", mpe.ioapic->ioapic_paddr); + cur += sizeof(struct mpe_ioapic); + break; + case MP_ENTRY_IOINT: + printf(" MP IO Interrupt Entry :\n"); + printf(" intr_type: %x\n", mpe.ioint->intr_type); + printf(" intr_flags: %x\n", mpe.ioint->intr_flags); + printf(" src_bus_id: %x\n", mpe.ioint->src_bus_id); + printf(" src_bus_irq: %x\n", mpe.ioint->src_bus_irq); + printf(" dst_apic_id: %x\n", mpe.ioint->dst_apic_id); + printf(" dst_apic_intin: %x\n", mpe.ioint->dst_apic_intin); + cur += sizeof(struct mpe_ioint); + break; + case MP_ENTRY_LINT: + printf(" MP Local Interrupt Entry :\n"); + cur += sizeof(struct mpe_lint); + break; + } + + } +} + +int +vm_build_mptable(struct vmctx *ctx, vm_paddr_t gpa, int len, int ncpu, + void *oemp, int oemsz) +{ + struct mp_config_hdr *mpch; + char *mapaddr; + char *startaddr; + int error; + + mapaddr = vm_map_memory(ctx, gpa, len); + if (mapaddr == MAP_FAILED) { + printf("%s\n", strerror(errno)); + goto err; + } + startaddr = mapaddr; + + mp_build_mpfp((struct mp_floating_pointer*) mapaddr, gpa); + mapaddr += sizeof(struct mp_floating_pointer); + + mpch = (struct mp_config_hdr*)mapaddr; + mp_build_mpch(mpch); + mapaddr += sizeof(struct mp_config_hdr); + + mp_build_proc_entries((struct mpe_proc*) mapaddr, ncpu); + mapaddr += (sizeof(struct mpe_proc)*ncpu); + mpch->nr_entries += ncpu; + + mp_build_bus_entries((struct mpe_bus*)mapaddr); + mapaddr += (sizeof(struct mpe_bus)*MPE_NUM_BUSES); + mpch->nr_entries += MPE_NUM_BUSES; +#if 0 + mp_build_ioapic_entries((struct mpe_ioapic*)mapaddr); + mapaddr += sizeof(struct mpe_ioapic); + mpch->nr_entries++; + + mp_build_ioint_entries((struct mpe_ioint*)mapaddr, MPEII_MAX_IRQ); + mapaddr += sizeof(struct mpe_ioint)*MPEII_MAX_IRQ; + mpch->nr_entries += MPEII_MAX_IRQ; + +#endif + if (oemp) { + mpch->oem_ptr = mapaddr - startaddr + gpa; + mpch->oem_sz = oemsz; + memcpy(mapaddr, oemp, oemsz); + } + mpch->length = (mapaddr) - ((char*) mpch); + mpch->checksum = mp_compute_checksum(mpch, sizeof(*mpch)); + + + // mptable_dump((struct mp_floating_pointer*)startaddr, mpch); +err: + return (error); +} diff --git a/lib/libvmmapi/mptable.h b/lib/libvmmapi/mptable.h new file mode 100644 index 0000000..cad8834 --- /dev/null +++ b/lib/libvmmapi/mptable.h @@ -0,0 +1,171 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MPTABLE_h_ +#define _MPTABLE_h_ + +#define MP_SPECREV (4) // MP spec revision 1.1 + +/* + * MP Floating Pointer Structure + */ +#define MPFP_SIGNATURE "_MP_" +#define MPFP_SIGNATURE_LEN (4) +#define MPFP_FEATURE2 (0x80) // IMCR is present +struct mp_floating_pointer { + uint8_t signature[MPFP_SIGNATURE_LEN]; + uint32_t mptable_paddr; + uint8_t length; + uint8_t specrev; + uint8_t checksum; + uint8_t feature1; + uint8_t feature2; + uint8_t feature3; + uint8_t feature4; + uint8_t feature5; +}; + + +/* + * MP Configuration Table Header + */ +#define MPCH_SIGNATURE "PCMP" +#define MPCH_SIGNATURE_LEN (4) + +#define MPCH_OEMID "NETAPP " +#define MPCH_OEMID_LEN (8) +#define MPCH_PRODID "vFiler " +#define MPCH_PRODID_LEN (12) + +struct mp_config_hdr { + uint8_t signature[MPCH_SIGNATURE_LEN]; + uint16_t length; + uint8_t specrev; + uint8_t checksum; + uint8_t oemid[MPCH_OEMID_LEN]; + uint8_t prodid[MPCH_PRODID_LEN]; + uint32_t oem_ptr; + uint16_t oem_sz; + uint16_t nr_entries; + uint32_t lapic_paddr; + uint16_t ext_length; + uint8_t ext_checksum; + uint8_t reserved; +}; + +#define MP_ENTRY_PROC (0) +#define MP_ENTRY_BUS (1) +#define MP_ENTRY_IOAPIC (2) +#define MP_ENTRY_IOINT (3) +#define MP_ENTRY_LINT (4) + +/* + * MP Processor Entry + */ + +#define MPEP_FLAGS_EN (0x1) +#define MPEP_FLAGS_BSP (0x2) + +#define MPEP_SIG_FAMILY (6) +#define MPEP_SIG_MODEL (26) +#define MPEP_SIG_STEPPING (5) +#define MPEP_SIGNATURE ((MPEP_SIG_FAMILY << 8) | (MPEP_SIG_MODEL << 4) \ + | (MPEP_SIG_STEPPING)) + +#define MPEP_FEATURES (0xBFEBFBFF) // Value from Intel i7 CPUID + +struct mpe_proc { + uint8_t entry_type; + uint8_t lapic_id; + uint8_t lapic_version; + uint8_t proc_flags; + uint32_t proc_signature; + uint32_t feature_flags; + uint8_t reserved[8]; +}; + +/* + * MP Bus Entry + */ + +#define MPE_NUM_BUSES (2) +#define MPE_BUSNAME_LEN (6) +#define MPE_BUSID_ISA (0) +#define MPE_BUSID_PCI (1) +#define MPE_BUSNAME_ISA "ISA " +#define MPE_BUSNAME_PCI "PCI " +struct mpe_bus { + uint8_t entry_type; + uint8_t busid; + uint8_t busname[MPE_BUSNAME_LEN]; +}; + +/* + * MP IO APIC Entry + */ +#define MPE_IOAPIC_ID (2) +#define MPE_IOAPIC_FLAG_EN (1) +struct mpe_ioapic { + uint8_t entry_type; + uint8_t ioapic_id; + uint8_t ioapic_version; + uint8_t ioapic_flags; + uint32_t ioapic_paddr; + +}; + +/* + * MP IO Interrupt Assignment Entry + */ +#define MPEII_INTR_INT (0) +#define MPEII_INTR_NMI (1) +#define MPEII_INTR_SMI (2) +#define MPEII_INTR_EXTINT (3) +#define MPEII_PCI_IRQ_MASK (0x0c20U) /* IRQ 5,10,11 are PCI connected */ +#define MPEII_MAX_IRQ (16) +#define MPEII_FLAGS_TRIGMODE_LEVEL (0x3) +struct mpe_ioint { + uint8_t entry_type; + uint8_t intr_type; + uint16_t intr_flags; + uint8_t src_bus_id; + uint8_t src_bus_irq; + uint8_t dst_apic_id; + uint8_t dst_apic_intin; +}; + +/* + * MP Local Interrupt Assignment Entry + */ +struct mpe_lint { + uint8_t entry_type; +}; + +int vm_build_mptable(struct vmctx *ctxt, vm_paddr_t gpa, int len, + int ncpu, void *oemp, int oemsz); +#endif /* _MPTABLE_h_ */ diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c new file mode 100644 index 0000000..7f95fea --- /dev/null +++ b/lib/libvmmapi/vmmapi.c @@ -0,0 +1,647 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "vmmapi.h" +#include "mptable.h" + +#ifndef CR4_VMXE +#define CR4_VMXE (1UL << 13) +#endif + +#define BIOS_ROM_BASE (0xf0000) +#define BIOS_ROM_SIZE (0x10000) + +struct vmctx { + int fd; + char *name; +}; + +#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) +#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) + +static int +vm_device_open(const char *name) +{ + int fd, len; + char *vmfile; + + len = strlen("/dev/vmm/") + strlen(name) + 1; + vmfile = malloc(len); + assert(vmfile != NULL); + snprintf(vmfile, len, "/dev/vmm/%s", name); + + /* Open the device file */ + fd = open(vmfile, O_RDWR, 0); + + free(vmfile); + return (fd); +} + +int +vm_create(const char *name) +{ + + return (CREATE((char *)name)); +} + +struct vmctx * +vm_open(const char *name) +{ + struct vmctx *vm; + + vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); + assert(vm != NULL); + + vm->fd = -1; + vm->name = (char *)(vm + 1); + strcpy(vm->name, name); + + if ((vm->fd = vm_device_open(vm->name)) < 0) + goto err; + + return (vm); +err: + vm_destroy(vm); + return (NULL); +} + +void +vm_destroy(struct vmctx *vm) +{ + assert(vm != NULL); + + DESTROY(vm->name); + if (vm->fd >= 0) + close(vm->fd); + free(vm); +} + +int +vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, + vm_paddr_t *ret_hpa, size_t *ret_len) +{ + int error; + struct vm_memory_segment seg; + + bzero(&seg, sizeof(seg)); + seg.gpa = gpa; + error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); + *ret_hpa = seg.hpa; + *ret_len = seg.len; + return (error); +} + +int +vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr) +{ + int error; + struct vm_memory_segment seg; + + /* + * Create and optionally map 'len' bytes of memory at guest + * physical address 'gpa' + */ + bzero(&seg, sizeof(seg)); + seg.gpa = gpa; + seg.len = len; + error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); + if (error == 0 && mapaddr != NULL) { + *mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, gpa); + } + return (error); +} + +char * +vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len) +{ + + /* Map 'len' bytes of memory at guest physical address 'gpa' */ + return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, gpa)); +} + +int +vm_set_desc(struct vmctx *ctx, int vcpu, int reg, + uint64_t base, uint32_t limit, uint32_t access) +{ + int error; + struct vm_seg_desc vmsegdesc; + + bzero(&vmsegdesc, sizeof(vmsegdesc)); + vmsegdesc.cpuid = vcpu; + vmsegdesc.regnum = reg; + vmsegdesc.desc.base = base; + vmsegdesc.desc.limit = limit; + vmsegdesc.desc.access = access; + + error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); + return (error); +} + +int +vm_get_desc(struct vmctx *ctx, int vcpu, int reg, + uint64_t *base, uint32_t *limit, uint32_t *access) +{ + int error; + struct vm_seg_desc vmsegdesc; + + bzero(&vmsegdesc, sizeof(vmsegdesc)); + vmsegdesc.cpuid = vcpu; + vmsegdesc.regnum = reg; + + error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); + if (error == 0) { + *base = vmsegdesc.desc.base; + *limit = vmsegdesc.desc.limit; + *access = vmsegdesc.desc.access; + } + return (error); +} + +int +vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) +{ + int error; + struct vm_register vmreg; + + bzero(&vmreg, sizeof(vmreg)); + vmreg.cpuid = vcpu; + vmreg.regnum = reg; + vmreg.regval = val; + + error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); + return (error); +} + +int +vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) +{ + int error; + struct vm_register vmreg; + + bzero(&vmreg, sizeof(vmreg)); + vmreg.cpuid = vcpu; + vmreg.regnum = reg; + + error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); + *ret_val = vmreg.regval; + return (error); +} + +int +vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid) +{ + int error; + struct vm_pin vmpin; + + bzero(&vmpin, sizeof(vmpin)); + vmpin.vm_cpuid = vcpu; + + error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin); + *host_cpuid = vmpin.host_cpuid; + return (error); +} + +int +vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid) +{ + int error; + struct vm_pin vmpin; + + bzero(&vmpin, sizeof(vmpin)); + vmpin.vm_cpuid = vcpu; + vmpin.host_cpuid = host_cpuid; + + error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin); + return (error); +} + +int +vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) +{ + int error; + struct vm_run vmrun; + + bzero(&vmrun, sizeof(vmrun)); + vmrun.cpuid = vcpu; + vmrun.rip = rip; + + error = ioctl(ctx->fd, VM_RUN, &vmrun); + bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); + return (error); +} + +static int +vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type, + int vector, int error_code, int error_code_valid) +{ + struct vm_event ev; + + bzero(&ev, sizeof(ev)); + ev.cpuid = vcpu; + ev.type = type; + ev.vector = vector; + ev.error_code = error_code; + ev.error_code_valid = error_code_valid; + + return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev)); +} + +int +vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, + int vector) +{ + + return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0)); +} + +int +vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, + int vector, int error_code) +{ + + return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1)); +} + +int +vm_build_tables(struct vmctx *ctxt, int ncpu, void *oemtbl, int oemtblsz) +{ + + return (vm_build_mptable(ctxt, BIOS_ROM_BASE, BIOS_ROM_SIZE, ncpu, + oemtbl, oemtblsz)); +} + +int +vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) +{ + struct vm_lapic_irq vmirq; + + bzero(&vmirq, sizeof(vmirq)); + vmirq.cpuid = vcpu; + vmirq.vector = vector; + + return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); +} + +int +vm_inject_nmi(struct vmctx *ctx, int vcpu) +{ + struct vm_nmi vmnmi; + + bzero(&vmnmi, sizeof(vmnmi)); + vmnmi.cpuid = vcpu; + + return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); +} + +int +vm_capability_name2type(const char *capname) +{ + int i; + + static struct { + const char *name; + int type; + } capstrmap[] = { + { "hlt_exit", VM_CAP_HALT_EXIT }, + { "mtrap_exit", VM_CAP_MTRAP_EXIT }, + { "pause_exit", VM_CAP_PAUSE_EXIT }, + { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, + { 0 } + }; + + for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { + if (strcmp(capstrmap[i].name, capname) == 0) + return (capstrmap[i].type); + } + + return (-1); +} + +int +vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, + int *retval) +{ + int error; + struct vm_capability vmcap; + + bzero(&vmcap, sizeof(vmcap)); + vmcap.cpuid = vcpu; + vmcap.captype = cap; + + error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); + *retval = vmcap.capval; + return (error); +} + +int +vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) +{ + struct vm_capability vmcap; + + bzero(&vmcap, sizeof(vmcap)); + vmcap.cpuid = vcpu; + vmcap.captype = cap; + vmcap.capval = val; + + return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); +} + +int +vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) +{ + struct vm_pptdev pptdev; + + bzero(&pptdev, sizeof(pptdev)); + pptdev.bus = bus; + pptdev.slot = slot; + pptdev.func = func; + + return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); +} + +int +vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) +{ + struct vm_pptdev pptdev; + + bzero(&pptdev, sizeof(pptdev)); + pptdev.bus = bus; + pptdev.slot = slot; + pptdev.func = func; + + return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); +} + +int +vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, + vm_paddr_t gpa, size_t len, vm_paddr_t hpa) +{ + struct vm_pptdev_mmio pptmmio; + + bzero(&pptmmio, sizeof(pptmmio)); + pptmmio.bus = bus; + pptmmio.slot = slot; + pptmmio.func = func; + pptmmio.gpa = gpa; + pptmmio.len = len; + pptmmio.hpa = hpa; + + return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); +} + +int +vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, + int destcpu, int vector, int numvec) +{ + struct vm_pptdev_msi pptmsi; + + bzero(&pptmsi, sizeof(pptmsi)); + pptmsi.vcpu = vcpu; + pptmsi.bus = bus; + pptmsi.slot = slot; + pptmsi.func = func; + pptmsi.destcpu = destcpu; + pptmsi.vector = vector; + pptmsi.numvec = numvec; + + return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); +} + +uint64_t * +vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, + int *ret_entries) +{ + int error; + + static struct vm_stats vmstats; + + vmstats.cpuid = vcpu; + + error = ioctl(ctx->fd, VM_STATS, &vmstats); + if (error == 0) { + if (ret_entries) + *ret_entries = vmstats.num_entries; + if (ret_tv) + *ret_tv = vmstats.tv; + return (vmstats.statbuf); + } else + return (NULL); +} + +const char * +vm_get_stat_desc(struct vmctx *ctx, int index) +{ + int error; + + static struct vm_stat_desc statdesc; + + statdesc.index = index; + if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) + return (statdesc.desc); + else + return (NULL); +} + +/* + * From Intel Vol 3a: + * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT + */ +int +vcpu_reset(struct vmctx *vmctx, int vcpu) +{ + int error; + uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; + uint32_t desc_access, desc_limit; + uint16_t sel; + + zero = 0; + + rflags = 0x2; + error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); + if (error) + goto done; + + rip = 0xfff0; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) + goto done; + + cr0 = CR0_NE; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) + goto done; + + cr4 = CR4_VMXE; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) + goto done; + + /* + * CS: present, r/w, accessed, 16-bit, byte granularity, usable + */ + desc_base = 0xffff0000; + desc_limit = 0xffff; + desc_access = 0x0093; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + sel = 0xf000; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) + goto done; + + /* + * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity + */ + desc_base = 0; + desc_limit = 0xffff; + desc_access = 0x0093; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + sel = 0; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) + goto done; + + /* General purpose registers */ + rdx = 0xf00; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) + goto done; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) + goto done; + + /* GDTR, IDTR */ + desc_base = 0; + desc_limit = 0xffff; + desc_access = 0; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, + desc_base, desc_limit, desc_access); + if (error != 0) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, + desc_base, desc_limit, desc_access); + if (error != 0) + goto done; + + /* TR */ + desc_base = 0; + desc_limit = 0xffff; + desc_access = 0x0000008b; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); + if (error) + goto done; + + sel = 0; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) + goto done; + + /* LDTR */ + desc_base = 0; + desc_limit = 0xffff; + desc_access = 0x00000082; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, + desc_limit, desc_access); + if (error) + goto done; + + sel = 0; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) + goto done; + + /* XXX cr2, debug registers */ + + error = 0; +done: + return (error); +} diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h new file mode 100644 index 0000000..38533a8 --- /dev/null +++ b/lib/libvmmapi/vmmapi.h @@ -0,0 +1,98 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VMMAPI_H_ +#define _VMMAPI_H_ + +struct vmctx; + +int vm_create(const char *name); +struct vmctx *vm_open(const char *name); +void vm_destroy(struct vmctx *ctx); +int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, + vm_paddr_t *ret_hpa, size_t *ret_len); +/* + * Create a memory segment of 'len' bytes in the guest physical address space + * at offset 'gpa'. + * + * If 'mapaddr' is not NULL then this region is mmap'ed into the address + * space of the calling process. If there is an mmap error then *mapaddr + * will be set to MAP_FAILED. + */ + +int vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, + char **mapaddr); +char * vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len); +int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, + uint64_t base, uint32_t limit, uint32_t access); +int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, + uint64_t *base, uint32_t *limit, uint32_t *access); +int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); +int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); +int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid); +int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid); +int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, + struct vm_exit *ret_vmexit); +int vm_build_tables(struct vmctx *ctxt, int ncpus, void *oemtbl, + int oemtblsz); +int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, + int vector); +int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, + int vector, int error_code); +int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector); +int vm_inject_nmi(struct vmctx *ctx, int vcpu); +int vm_capability_name2type(const char *capname); +int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, + int *retval); +int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, + int val); +int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func); +int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func); +int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, + vm_paddr_t gpa, size_t len, vm_paddr_t hpa); +int vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, + int dest, int vector, int numvec); + +/* + * Return a pointer to the statistics buffer. Note that this is not MT-safe. + */ +uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, + int *ret_entries); +const char *vm_get_stat_desc(struct vmctx *ctx, int index); + +/* Reset vcpu register state */ +int vcpu_reset(struct vmctx *ctx, int vcpu); + +/* + * FreeBSD specific APIs + */ +int vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu, + uint64_t rip, uint64_t cr3, uint64_t gdtbase, + uint64_t rsp); +void vm_setup_freebsd_gdt(uint64_t *gdtr); +#endif /* _VMMAPI_H_ */ diff --git a/lib/libvmmapi/vmmapi_freebsd.c b/lib/libvmmapi/vmmapi_freebsd.c new file mode 100644 index 0000000..c4ad989 --- /dev/null +++ b/lib/libvmmapi/vmmapi_freebsd.c @@ -0,0 +1,187 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include + +#include "vmmapi.h" + +#ifndef CR4_VMXE +#define CR4_VMXE (1UL << 13) +#endif + +#define DESC_UNUSABLE 0x00010000 + +#define GUEST_NULL_SEL 0 +#define GUEST_CODE_SEL 1 +#define GUEST_DATA_SEL 2 +#define GUEST_GDTR_LIMIT (3 * 8 - 1) + +void +vm_setup_freebsd_gdt(uint64_t *gdtr) +{ + gdtr[GUEST_NULL_SEL] = 0; + gdtr[GUEST_CODE_SEL] = 0x0020980000000000; + gdtr[GUEST_DATA_SEL] = 0x0000900000000000; +} + +/* + * Setup the 'vcpu' register set such that it will begin execution at + * 'rip' in long mode. + */ +int +vm_setup_freebsd_registers(struct vmctx *vmctx, int vcpu, + uint64_t rip, uint64_t cr3, uint64_t gdtbase, + uint64_t rsp) +{ + int error; + uint64_t cr0, cr4, efer, rflags, desc_base; + uint32_t desc_access, desc_limit; + uint16_t gsel; + + cr0 = CR0_PE | CR0_PG | CR0_NE; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) + goto done; + + cr4 = CR4_PAE | CR4_VMXE; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) + goto done; + + efer = EFER_LME | EFER_LMA; + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_EFER, efer))) + goto done; + + rflags = 0x2; + error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); + if (error) + goto done; + + desc_base = 0; + desc_limit = 0; + desc_access = 0x0000209B; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + desc_access = 0x00000093; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, + desc_base, desc_limit, desc_access); + if (error) + goto done; + + /* + * XXX TR is pointing to null selector even though we set the + * TSS segment to be usable with a base address and limit of 0. + */ + desc_access = 0x0000008b; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); + if (error) + goto done; + + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, 0, 0, + DESC_UNUSABLE); + if (error) + goto done; + + gsel = GSEL(GUEST_CODE_SEL, SEL_KPL); + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, gsel)) != 0) + goto done; + + gsel = GSEL(GUEST_DATA_SEL, SEL_KPL); + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, gsel)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, gsel)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, gsel)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, gsel)) != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, gsel)) != 0) + goto done; + + /* XXX TR is pointing to the null selector */ + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, 0)) != 0) + goto done; + + /* LDTR is pointing to the null selector */ + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) + goto done; + + /* entry point */ + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) + goto done; + + /* page table base */ + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, cr3)) != 0) + goto done; + + desc_base = gdtbase; + desc_limit = GUEST_GDTR_LIMIT; + error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, + desc_base, desc_limit, 0); + if (error != 0) + goto done; + + if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, rsp)) != 0) + goto done; + + error = 0; +done: + return (error); +} -- cgit v1.1 From 9f0c999f8126597eb572b80056df88335dbd0070 Mon Sep 17 00:00:00 2001 From: grehan Date: Sat, 28 Apr 2012 16:28:00 +0000 Subject: MSI-x interrupt support for PCI pass-thru devices. Includes instruction emulation for memory r/w access. This opens the door for io-apic, local apic, hpet timer, and legacy device emulation. Submitted by: ryan dot berryhill at sandvine dot com Reviewed by: grehan Obtained from: Sandvine --- lib/libvmmapi/vmmapi.c | 19 +++++++++++++++++++ lib/libvmmapi/vmmapi.h | 2 ++ 2 files changed, 21 insertions(+) (limited to 'lib') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 17f2063..a33a94e 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -454,6 +454,25 @@ vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); } +int +vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, + int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) +{ + struct vm_pptdev_msix pptmsix; + + bzero(&pptmsix, sizeof(pptmsix)); + pptmsix.vcpu = vcpu; + pptmsix.bus = bus; + pptmsix.slot = slot; + pptmsix.func = func; + pptmsix.idx = idx; + pptmsix.msg = msg; + pptmsix.addr = addr; + pptmsix.vector_control = vector_control; + + return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); +} + uint64_t * vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, int *ret_entries) diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 38533a8..7b66421 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -77,6 +77,8 @@ int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int dest, int vector, int numvec); +int vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, + int idx, uint32_t msg, uint32_t vector_control, uint64_t addr); /* * Return a pointer to the statistics buffer. Note that this is not MT-safe. -- cgit v1.1 From ea90afcf927db1d8e7cf9775414b82ca13dff8b3 Mon Sep 17 00:00:00 2001 From: dougb Date: Mon, 4 Jun 2012 22:06:29 +0000 Subject: Vendor import of BIND 9.8.3-P1 --- lib/dns/rdata.c | 8 ++++---- lib/dns/rdataslab.c | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/dns/rdata.c b/lib/dns/rdata.c index 8773145..f6807d1 100644 --- a/lib/dns/rdata.c +++ b/lib/dns/rdata.c @@ -329,8 +329,8 @@ dns_rdata_compare(const dns_rdata_t *rdata1, const dns_rdata_t *rdata2) { REQUIRE(rdata1 != NULL); REQUIRE(rdata2 != NULL); - REQUIRE(rdata1->data != NULL); - REQUIRE(rdata2->data != NULL); + REQUIRE(rdata1->length == 0 || rdata1->data != NULL); + REQUIRE(rdata2->length == 0 || rdata2->data != NULL); REQUIRE(DNS_RDATA_VALIDFLAGS(rdata1)); REQUIRE(DNS_RDATA_VALIDFLAGS(rdata2)); @@ -360,8 +360,8 @@ dns_rdata_casecompare(const dns_rdata_t *rdata1, const dns_rdata_t *rdata2) { REQUIRE(rdata1 != NULL); REQUIRE(rdata2 != NULL); - REQUIRE(rdata1->data != NULL); - REQUIRE(rdata2->data != NULL); + REQUIRE(rdata1->length == 0 || rdata1->data != NULL); + REQUIRE(rdata2->length == 0 || rdata2->data != NULL); REQUIRE(DNS_RDATA_VALIDFLAGS(rdata1)); REQUIRE(DNS_RDATA_VALIDFLAGS(rdata2)); diff --git a/lib/dns/rdataslab.c b/lib/dns/rdataslab.c index 150d9b8..6fbfdd7 100644 --- a/lib/dns/rdataslab.c +++ b/lib/dns/rdataslab.c @@ -126,6 +126,11 @@ isc_result_t dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, isc_region_t *region, unsigned int reservelen) { + /* + * Use &removed as a sentinal pointer for duplicate + * rdata as rdata.data == NULL is valid. + */ + static unsigned char removed; struct xrdata *x; unsigned char *rawbuf; #if DNS_RDATASET_FIXED @@ -169,6 +174,7 @@ dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, INSIST(result == ISC_R_SUCCESS); dns_rdata_init(&x[i].rdata); dns_rdataset_current(rdataset, &x[i].rdata); + INSIST(x[i].rdata.data != &removed); #if DNS_RDATASET_FIXED x[i].order = i; #endif @@ -201,8 +207,7 @@ dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, */ for (i = 1; i < nalloc; i++) { if (compare_rdata(&x[i-1].rdata, &x[i].rdata) == 0) { - x[i-1].rdata.data = NULL; - x[i-1].rdata.length = 0; + x[i-1].rdata.data = &removed; #if DNS_RDATASET_FIXED /* * Preserve the least order so A, B, A -> A, B @@ -292,7 +297,7 @@ dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, #endif for (i = 0; i < nalloc; i++) { - if (x[i].rdata.data == NULL) + if (x[i].rdata.data == &removed) continue; #if DNS_RDATASET_FIXED offsettable[x[i].order] = rawbuf - offsetbase; -- cgit v1.1 From bf4ca397d7b9fc67cd4254cd07e7ce9b42b22250 Mon Sep 17 00:00:00 2001 From: dougb Date: Tue, 24 Jul 2012 18:48:17 +0000 Subject: Vendor import of BIND 9.8.3-P2 --- lib/dns/resolver.c | 5 +++-- lib/dns/zone.c | 26 ++++++++++++++++++++------ 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 1ae2f16..4c8b144 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -8448,6 +8448,7 @@ dns_resolver_addbadcache(dns_resolver_t *resolver, dns_name_t *name, goto cleanup; bad->type = type; bad->hashval = hashval; + bad->expire = *expire; isc_buffer_init(&buffer, bad + 1, name->length); dns_name_init(&bad->name, NULL); dns_name_copy(name, &bad->name, &buffer); @@ -8459,8 +8460,8 @@ dns_resolver_addbadcache(dns_resolver_t *resolver, dns_name_t *name, if (resolver->badcount < resolver->badhash * 2 && resolver->badhash > DNS_BADCACHE_SIZE) resizehash(resolver, &now, ISC_FALSE); - } - bad->expire = *expire; + } else + bad->expire = *expire; cleanup: UNLOCK(&resolver->lock); } diff --git a/lib/dns/zone.c b/lib/dns/zone.c index 22870dc..efe31eb 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -8027,13 +8027,14 @@ zone_maintenance(dns_zone_t *zone) { case dns_zone_master: case dns_zone_slave: case dns_zone_key: + case dns_zone_stub: LOCK_ZONE(zone); if (zone->masterfile != NULL && isc_time_compare(&now, &zone->dumptime) >= 0 && DNS_ZONE_FLAG(zone, DNS_ZONEFLG_LOADED) && DNS_ZONE_FLAG(zone, DNS_ZONEFLG_NEEDDUMP)) { dumping = was_dumping(zone); - } else + } else dumping = ISC_TRUE; UNLOCK_ZONE(zone); if (!dumping) { @@ -8386,7 +8387,7 @@ zone_dump(dns_zone_t *zone, isc_boolean_t compact) { goto fail; } - if (compact) { + if (compact && zone->type != dns_zone_stub) { dns_zone_t *dummy = NULL; LOCK_ZONE(zone); zone_iattach(zone, &dummy); @@ -9242,7 +9243,7 @@ stub_callback(isc_task_t *task, isc_event_t *event) { dns_zone_t *zone = NULL; char master[ISC_SOCKADDR_FORMATSIZE]; char source[ISC_SOCKADDR_FORMATSIZE]; - isc_uint32_t nscnt, cnamecnt; + isc_uint32_t nscnt, cnamecnt, refresh, retry, expire; isc_result_t result; isc_time_t now; isc_boolean_t exiting = ISC_FALSE; @@ -9390,19 +9391,32 @@ stub_callback(isc_task_t *task, isc_event_t *event) { ZONEDB_LOCK(&zone->dblock, isc_rwlocktype_write); if (zone->db == NULL) zone_attachdb(zone, stub->db); + result = zone_get_from_db(zone, zone->db, NULL, NULL, NULL, &refresh, + &retry, &expire, NULL, NULL); + if (result == ISC_R_SUCCESS) { + zone->refresh = RANGE(refresh, zone->minrefresh, + zone->maxrefresh); + zone->retry = RANGE(retry, zone->minretry, zone->maxretry); + zone->expire = RANGE(expire, zone->refresh + zone->retry, + DNS_MAX_EXPIRE); + DNS_ZONE_SETFLAG(zone, DNS_ZONEFLG_HAVETIMERS); + } ZONEDB_UNLOCK(&zone->dblock, isc_rwlocktype_write); dns_db_detach(&stub->db); - if (zone->masterfile != NULL) - zone_needdump(zone, 0); - dns_message_destroy(&msg); isc_event_free(&event); dns_request_destroy(&zone->request); + DNS_ZONE_CLRFLAG(zone, DNS_ZONEFLG_REFRESH); + DNS_ZONE_SETFLAG(zone, DNS_ZONEFLG_LOADED); DNS_ZONE_JITTER_ADD(&now, zone->refresh, &zone->refreshtime); isc_interval_set(&i, zone->expire, 0); DNS_ZONE_TIME_ADD(&now, zone->expire, &zone->expiretime); + + if (zone->masterfile != NULL) + zone_needdump(zone, 0); + zone_settimer(zone, &now); goto free_stub; -- cgit v1.1 From 19e3bb399c65527fcc13a4ae345b71ee6c5fee84 Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 4 Aug 2012 02:14:27 +0000 Subject: There is no need to explicitly specify the CR4_VMXE bit when writing to guest CR4. This bit is specific to the Intel VTX and removing it makes the library more portable to AMD/SVM. In the Intel VTX implementation, the hypervisor will ensure that this bit is always set. See vmx_fix_cr4() for details. Suggested by: grehan --- lib/libvmmapi/vmmapi.c | 6 +----- lib/libvmmapi/vmmapi_freebsd.c | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index a33a94e..077a46b 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -49,10 +49,6 @@ __FBSDID("$FreeBSD$"); #include "vmmapi.h" #include "mptable.h" -#ifndef CR4_VMXE -#define CR4_VMXE (1UL << 13) -#endif - #define BIOS_ROM_BASE (0xf0000) #define BIOS_ROM_SIZE (0x10000) @@ -536,7 +532,7 @@ vcpu_reset(struct vmctx *vmctx, int vcpu) if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) goto done; - cr4 = CR4_VMXE; + cr4 = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) goto done; diff --git a/lib/libvmmapi/vmmapi_freebsd.c b/lib/libvmmapi/vmmapi_freebsd.c index c4ad989..9bd2988 100644 --- a/lib/libvmmapi/vmmapi_freebsd.c +++ b/lib/libvmmapi/vmmapi_freebsd.c @@ -37,10 +37,6 @@ __FBSDID("$FreeBSD$"); #include "vmmapi.h" -#ifndef CR4_VMXE -#define CR4_VMXE (1UL << 13) -#endif - #define DESC_UNUSABLE 0x00010000 #define GUEST_NULL_SEL 0 @@ -74,7 +70,7 @@ vm_setup_freebsd_registers(struct vmctx *vmctx, int vcpu, if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) goto done; - cr4 = CR4_PAE | CR4_VMXE; + cr4 = CR4_PAE; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) goto done; -- cgit v1.1 From f07527ef6066c09742e5ca0ff4631f9c2c68e82d Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 4 Aug 2012 02:38:05 +0000 Subject: API to map an apic id to the vcpu. At the moment this is a simple mapping because the numerical values are identical. --- lib/libvmmapi/vmmapi.c | 10 ++++++++++ lib/libvmmapi/vmmapi.h | 1 + 2 files changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 077a46b..d88b694 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -314,6 +314,16 @@ vm_build_tables(struct vmctx *ctxt, int ncpu, void *oemtbl, int oemtblsz) } int +vm_apicid2vcpu(struct vmctx *ctx, int apicid) +{ + /* + * The apic id associated with the 'vcpu' has the same numerical value + * as the 'vcpu' itself. + */ + return (apicid); +} + +int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) { struct vm_lapic_irq vmirq; diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 7b66421..a216319 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -60,6 +60,7 @@ int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *ret_vmexit); int vm_build_tables(struct vmctx *ctxt, int ncpus, void *oemtbl, int oemtblsz); +int vm_apicid2vcpu(struct vmctx *ctx, int apicid); int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector); int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, -- cgit v1.1 From ac88464d24af550cd19d2c9aefd2f431e6ae89ed Mon Sep 17 00:00:00 2001 From: neel Date: Sat, 4 Aug 2012 22:46:29 +0000 Subject: Allow the 'bhyve' process to control whether or not the virtual machine sees an ioapic. Obtained from: NetApp --- lib/libvmmapi/mptable.c | 26 +++++++++++++++----------- lib/libvmmapi/mptable.h | 3 +-- lib/libvmmapi/vmmapi.c | 5 +++-- lib/libvmmapi/vmmapi.h | 4 ++-- 4 files changed, 21 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/libvmmapi/mptable.c b/lib/libvmmapi/mptable.c index fe84f35..048ea2d 100644 --- a/lib/libvmmapi/mptable.c +++ b/lib/libvmmapi/mptable.c @@ -118,20 +118,20 @@ mp_build_bus_entries(struct mpe_bus *mpeb) } -#ifdef notyet static void -mp_build_ioapic_entries(struct mpe_ioapic *mpei) +mp_build_ioapic_entries(struct mpe_ioapic *mpei, int id) { memset(mpei, 0, sizeof(*mpei)); mpei->entry_type = MP_ENTRY_IOAPIC; - mpei->ioapic_id = MPE_IOAPIC_ID; + mpei->ioapic_id = id; mpei->ioapic_version = IOAPIC_VERSION; mpei->ioapic_flags = MPE_IOAPIC_FLAG_EN; mpei->ioapic_paddr = IOAPIC_PADDR; } +#ifdef notyet static void -mp_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins) +mp_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins, int id) { int pin; @@ -147,7 +147,7 @@ mp_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins) memset(mpeii, 0, sizeof(*mpeii)); mpeii->entry_type = MP_ENTRY_IOINT; mpeii->src_bus_id = MPE_BUSID_ISA; - mpeii->dst_apic_id = MPE_IOAPIC_ID; + mpeii->dst_apic_id = id; /* * All default configs route IRQs from bus 0 to the first 16 pins @@ -285,7 +285,7 @@ mptable_dump(struct mp_floating_pointer *mpfp, struct mp_config_hdr *mpch) int vm_build_mptable(struct vmctx *ctx, vm_paddr_t gpa, int len, int ncpu, - void *oemp, int oemsz) + int ioapic, void *oemp, int oemsz) { struct mp_config_hdr *mpch; char *mapaddr; @@ -313,12 +313,16 @@ vm_build_mptable(struct vmctx *ctx, vm_paddr_t gpa, int len, int ncpu, mp_build_bus_entries((struct mpe_bus*)mapaddr); mapaddr += (sizeof(struct mpe_bus)*MPE_NUM_BUSES); mpch->nr_entries += MPE_NUM_BUSES; -#if 0 - mp_build_ioapic_entries((struct mpe_ioapic*)mapaddr); - mapaddr += sizeof(struct mpe_ioapic); - mpch->nr_entries++; - mp_build_ioint_entries((struct mpe_ioint*)mapaddr, MPEII_MAX_IRQ); + if (ioapic) { + mp_build_ioapic_entries((struct mpe_ioapic*)mapaddr, ncpu + 1); + mapaddr += sizeof(struct mpe_ioapic); + mpch->nr_entries++; + } + +#ifdef notyet + mp_build_ioint_entries((struct mpe_ioint*)mapaddr, MPEII_MAX_IRQ, + ncpu + 1); mapaddr += sizeof(struct mpe_ioint)*MPEII_MAX_IRQ; mpch->nr_entries += MPEII_MAX_IRQ; diff --git a/lib/libvmmapi/mptable.h b/lib/libvmmapi/mptable.h index cad8834..51c7bc1 100644 --- a/lib/libvmmapi/mptable.h +++ b/lib/libvmmapi/mptable.h @@ -128,7 +128,6 @@ struct mpe_bus { /* * MP IO APIC Entry */ -#define MPE_IOAPIC_ID (2) #define MPE_IOAPIC_FLAG_EN (1) struct mpe_ioapic { uint8_t entry_type; @@ -167,5 +166,5 @@ struct mpe_lint { }; int vm_build_mptable(struct vmctx *ctxt, vm_paddr_t gpa, int len, - int ncpu, void *oemp, int oemsz); + int ncpu, int ioapic, void *oemp, int oemsz); #endif /* _MPTABLE_h_ */ diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index d88b694..5882bd2 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -306,11 +306,12 @@ vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, } int -vm_build_tables(struct vmctx *ctxt, int ncpu, void *oemtbl, int oemtblsz) +vm_build_tables(struct vmctx *ctxt, int ncpu, int ioapic, + void *oemtbl, int oemtblsz) { return (vm_build_mptable(ctxt, BIOS_ROM_BASE, BIOS_ROM_SIZE, ncpu, - oemtbl, oemtblsz)); + ioapic, oemtbl, oemtblsz)); } int diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index a216319..de3d4b7 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -58,8 +58,8 @@ int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid); int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *ret_vmexit); -int vm_build_tables(struct vmctx *ctxt, int ncpus, void *oemtbl, - int oemtblsz); +int vm_build_tables(struct vmctx *ctxt, int ncpus, int ioapic, + void *oemtbl, int oemtblsz); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector); -- cgit v1.1 From cc73504950eb7b5dff2dded9bedd67bc36d64641 Mon Sep 17 00:00:00 2001 From: dim Date: Sun, 19 Aug 2012 10:33:04 +0000 Subject: Vendor import of clang trunk r162107: http://llvm.org/svn/llvm-project/cfe/trunk@162107 --- lib/AST/APValue.cpp | 5 +- lib/AST/ASTContext.cpp | 68 +++- lib/AST/ASTDiagnostic.cpp | 2 +- lib/AST/CommentCommandTraits.cpp | 10 +- lib/AST/DeclCXX.cpp | 13 +- lib/AST/DeclPrinter.cpp | 56 +-- lib/AST/DeclTemplate.cpp | 3 +- lib/AST/DumpXML.cpp | 5 + lib/AST/NestedNameSpecifier.cpp | 9 +- lib/AST/RawCommentList.cpp | 21 +- lib/AST/Stmt.cpp | 23 +- lib/AST/StmtPrinter.cpp | 32 +- lib/AST/TemplateBase.cpp | 6 +- lib/Basic/Diagnostic.cpp | 6 +- lib/Basic/Targets.cpp | 3 +- lib/CodeGen/CGBuiltin.cpp | 69 +++- lib/CodeGen/CGDebugInfo.cpp | 41 +- lib/CodeGen/CGExpr.cpp | 80 ++++ lib/CodeGen/CGExprCXX.cpp | 9 +- lib/CodeGen/CGStmt.cpp | 24 +- lib/CodeGen/CGValue.h | 2 +- lib/Driver/Tools.cpp | 3 + lib/Frontend/ASTConsumers.cpp | 2 +- lib/Frontend/CacheTokens.cpp | 2 +- lib/Lex/PTHLexer.cpp | 6 +- lib/Parse/ParseDecl.cpp | 73 +++- lib/Parse/ParseStmt.cpp | 181 ++++----- lib/Rewrite/RewriteModernObjC.cpp | 9 +- lib/Rewrite/RewriteObjC.cpp | 10 +- lib/Sema/AttributeList.cpp | 2 + lib/Sema/SemaCast.cpp | 20 +- lib/Sema/SemaChecking.cpp | 420 ++++++++++++++++++++- lib/Sema/SemaCodeComplete.cpp | 1 - lib/Sema/SemaDecl.cpp | 72 +++- lib/Sema/SemaDeclAttr.cpp | 163 ++++++-- lib/Sema/SemaDeclCXX.cpp | 2 +- lib/Sema/SemaExceptionSpec.cpp | 3 +- lib/Sema/SemaExpr.cpp | 3 +- lib/Sema/SemaExprMember.cpp | 2 +- lib/Sema/SemaOverload.cpp | 2 +- lib/Sema/SemaStmt.cpp | 329 ++++++++++------ lib/Sema/SemaTemplate.cpp | 7 + lib/Sema/SemaTemplateInstantiate.cpp | 3 +- lib/Sema/SemaType.cpp | 46 ++- lib/Sema/TreeTransform.h | 19 +- lib/Serialization/ASTReader.cpp | 1 - lib/Serialization/ASTWriter.cpp | 1 - .../Checkers/CallAndMessageChecker.cpp | 12 +- .../Checkers/DynamicTypePropagation.cpp | 87 ++++- lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp | 151 +------- lib/StaticAnalyzer/Core/AnalysisManager.cpp | 45 +-- lib/StaticAnalyzer/Core/BugReporter.cpp | 189 ++++------ lib/StaticAnalyzer/Core/BugReporterVisitors.cpp | 2 +- lib/StaticAnalyzer/Core/CallEvent.cpp | 86 +++-- .../Core/ExprEngineCallAndReturn.cpp | 42 ++- lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp | 48 +-- lib/StaticAnalyzer/Core/PathDiagnostic.cpp | 18 +- lib/StaticAnalyzer/Core/PlistDiagnostics.cpp | 92 +++-- lib/StaticAnalyzer/Core/ProgramState.cpp | 7 +- lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp | 12 +- lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp | 74 +++- 61 files changed, 1869 insertions(+), 865 deletions(-) (limited to 'lib') diff --git a/lib/AST/APValue.cpp b/lib/AST/APValue.cpp index a74ef14..2d7c9bd 100644 --- a/lib/AST/APValue.cpp +++ b/lib/AST/APValue.cpp @@ -367,8 +367,7 @@ void APValue::printPretty(raw_ostream &Out, ASTContext &Ctx, QualType Ty) const{ if (const ValueDecl *VD = Base.dyn_cast()) Out << *VD; else - Base.get()->printPretty(Out, Ctx, 0, - Ctx.getPrintingPolicy()); + Base.get()->printPretty(Out, 0, Ctx.getPrintingPolicy()); if (!O.isZero()) { Out << " + " << (O / S); if (IsReference) @@ -389,7 +388,7 @@ void APValue::printPretty(raw_ostream &Out, ASTContext &Ctx, QualType Ty) const{ ElemTy = VD->getType(); } else { const Expr *E = Base.get(); - E->printPretty(Out, Ctx, 0,Ctx.getPrintingPolicy()); + E->printPretty(Out, 0, Ctx.getPrintingPolicy()); ElemTy = E->getType(); } diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index ad48dff..c021323 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -66,6 +66,12 @@ RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { if (D->isImplicit()) return NULL; + // User can not attach documentation to implicit instantiations. + if (const FunctionDecl *FD = dyn_cast(D)) { + if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + return NULL; + } + // TODO: handle comments for function parameters properly. if (isa(D)) return NULL; @@ -145,7 +151,6 @@ RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { SourceMgr.getLineNumber(DeclLocDecomp.first, DeclLocDecomp.second) == SourceMgr.getLineNumber(CommentBeginDecomp.first, CommentBeginDecomp.second)) { - (*Comment)->setDecl(D); return *Comment; } } @@ -185,13 +190,13 @@ RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { if (Text.find_first_of(",;{}#@") != StringRef::npos) return NULL; - (*Comment)->setDecl(D); return *Comment; } -const RawComment *ASTContext::getRawCommentForAnyRedecl(const Decl *D) const { - // If we have a 'templated' declaration for a template, adjust 'D' to - // refer to the actual template. +namespace { +/// If we have a 'templated' declaration for a template, adjust 'D' to +/// refer to the actual template. +const Decl *adjustDeclToTemplate(const Decl *D) { if (const FunctionDecl *FD = dyn_cast(D)) { if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate()) D = FTD; @@ -200,6 +205,14 @@ const RawComment *ASTContext::getRawCommentForAnyRedecl(const Decl *D) const { D = CTD; } // FIXME: Alias templates? + return D; +} +} // unnamed namespace + +const RawComment *ASTContext::getRawCommentForAnyRedecl( + const Decl *D, + const Decl **OriginalDecl) const { + D = adjustDeclToTemplate(D); // Check whether we have cached a comment for this declaration already. { @@ -207,13 +220,17 @@ const RawComment *ASTContext::getRawCommentForAnyRedecl(const Decl *D) const { RedeclComments.find(D); if (Pos != RedeclComments.end()) { const RawCommentAndCacheFlags &Raw = Pos->second; - if (Raw.getKind() != RawCommentAndCacheFlags::NoCommentInDecl) + if (Raw.getKind() != RawCommentAndCacheFlags::NoCommentInDecl) { + if (OriginalDecl) + *OriginalDecl = Raw.getOriginalDecl(); return Raw.getRaw(); + } } } // Search for comments attached to declarations in the redeclaration chain. const RawComment *RC = NULL; + const Decl *OriginalDeclForRC = NULL; for (Decl::redecl_iterator I = D->redecls_begin(), E = D->redecls_end(); I != E; ++I) { @@ -223,16 +240,19 @@ const RawComment *ASTContext::getRawCommentForAnyRedecl(const Decl *D) const { const RawCommentAndCacheFlags &Raw = Pos->second; if (Raw.getKind() != RawCommentAndCacheFlags::NoCommentInDecl) { RC = Raw.getRaw(); + OriginalDeclForRC = Raw.getOriginalDecl(); break; } } else { RC = getRawCommentForDeclNoCache(*I); + OriginalDeclForRC = *I; RawCommentAndCacheFlags Raw; if (RC) { Raw.setRaw(RC); Raw.setKind(RawCommentAndCacheFlags::FromDecl); } else Raw.setKind(RawCommentAndCacheFlags::NoCommentInDecl); + Raw.setOriginalDecl(*I); RedeclComments[*I] = Raw; if (RC) break; @@ -242,10 +262,14 @@ const RawComment *ASTContext::getRawCommentForAnyRedecl(const Decl *D) const { // If we found a comment, it should be a documentation comment. assert(!RC || RC->isDocumentation()); + if (OriginalDecl) + *OriginalDecl = OriginalDeclForRC; + // Update cache for every declaration in the redeclaration chain. RawCommentAndCacheFlags Raw; Raw.setRaw(RC); Raw.setKind(RawCommentAndCacheFlags::FromRedecl); + Raw.setOriginalDecl(OriginalDeclForRC); for (Decl::redecl_iterator I = D->redecls_begin(), E = D->redecls_end(); @@ -259,11 +283,24 @@ const RawComment *ASTContext::getRawCommentForAnyRedecl(const Decl *D) const { } comments::FullComment *ASTContext::getCommentForDecl(const Decl *D) const { - const RawComment *RC = getRawCommentForAnyRedecl(D); + D = adjustDeclToTemplate(D); + const Decl *Canonical = D->getCanonicalDecl(); + llvm::DenseMap::iterator Pos = + ParsedComments.find(Canonical); + if (Pos != ParsedComments.end()) + return Pos->second; + + const Decl *OriginalDecl; + const RawComment *RC = getRawCommentForAnyRedecl(D, &OriginalDecl); if (!RC) return NULL; - return RC->getParsed(*this); + if (D != OriginalDecl) + return getCommentForDecl(OriginalDecl); + + comments::FullComment *FC = RC->parse(*this, D); + ParsedComments[Canonical] = FC; + return FC; } void @@ -5437,7 +5474,8 @@ ASTContext::getQualifiedTemplateName(NestedNameSpecifier *NNS, QualifiedTemplateName *QTN = QualifiedTemplateNames.FindNodeOrInsertPos(ID, InsertPos); if (!QTN) { - QTN = new (*this,4) QualifiedTemplateName(NNS, TemplateKeyword, Template); + QTN = new (*this, llvm::alignOf()) + QualifiedTemplateName(NNS, TemplateKeyword, Template); QualifiedTemplateNames.InsertNode(QTN, InsertPos); } @@ -5464,10 +5502,12 @@ ASTContext::getDependentTemplateName(NestedNameSpecifier *NNS, NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS); if (CanonNNS == NNS) { - QTN = new (*this,4) DependentTemplateName(NNS, Name); + QTN = new (*this, llvm::alignOf()) + DependentTemplateName(NNS, Name); } else { TemplateName Canon = getDependentTemplateName(CanonNNS, Name); - QTN = new (*this,4) DependentTemplateName(NNS, Name, Canon); + QTN = new (*this, llvm::alignOf()) + DependentTemplateName(NNS, Name, Canon); DependentTemplateName *CheckQTN = DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos); assert(!CheckQTN && "Dependent type name canonicalization broken"); @@ -5498,10 +5538,12 @@ ASTContext::getDependentTemplateName(NestedNameSpecifier *NNS, NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS); if (CanonNNS == NNS) { - QTN = new (*this,4) DependentTemplateName(NNS, Operator); + QTN = new (*this, llvm::alignOf()) + DependentTemplateName(NNS, Operator); } else { TemplateName Canon = getDependentTemplateName(CanonNNS, Operator); - QTN = new (*this,4) DependentTemplateName(NNS, Operator, Canon); + QTN = new (*this, llvm::alignOf()) + DependentTemplateName(NNS, Operator, Canon); DependentTemplateName *CheckQTN = DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos); diff --git a/lib/AST/ASTDiagnostic.cpp b/lib/AST/ASTDiagnostic.cpp index 35fcd41..a605f1a 100644 --- a/lib/AST/ASTDiagnostic.cpp +++ b/lib/AST/ASTDiagnostic.cpp @@ -1149,7 +1149,7 @@ class TemplateDiff { if (!E) OS << "(no argument)"; else - E->printPretty(OS, Context, 0, Policy); return; + E->printPretty(OS, 0, Policy); return; } /// PrintTemplateTemplate - Handles printing of template template arguments, diff --git a/lib/AST/CommentCommandTraits.cpp b/lib/AST/CommentCommandTraits.cpp index d8ce1f3..dc7a0bd 100644 --- a/lib/AST/CommentCommandTraits.cpp +++ b/lib/AST/CommentCommandTraits.cpp @@ -15,9 +15,9 @@ namespace comments { // TODO: tablegen -bool CommandTraits::isVerbatimBlockCommand(StringRef BeginName, +bool CommandTraits::isVerbatimBlockCommand(StringRef StartName, StringRef &EndName) const { - const char *Result = llvm::StringSwitch(BeginName) + const char *Result = llvm::StringSwitch(StartName) .Case("code", "endcode") .Case("verbatim", "endverbatim") .Case("htmlonly", "endhtmlonly") @@ -44,7 +44,7 @@ bool CommandTraits::isVerbatimBlockCommand(StringRef BeginName, I = VerbatimBlockCommands.begin(), E = VerbatimBlockCommands.end(); I != E; ++I) - if (I->BeginName == BeginName) { + if (I->StartName == StartName) { EndName = I->EndName; return true; } @@ -115,10 +115,10 @@ bool CommandTraits::isDeclarationCommand(StringRef Name) const { .Default(false); } -void CommandTraits::addVerbatimBlockCommand(StringRef BeginName, +void CommandTraits::addVerbatimBlockCommand(StringRef StartName, StringRef EndName) { VerbatimBlockCommand VBC; - VBC.BeginName = BeginName; + VBC.StartName = StartName; VBC.EndName = EndName; VerbatimBlockCommands.push_back(VBC); } diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp index eec2e9d..2f21e4c 100644 --- a/lib/AST/DeclCXX.cpp +++ b/lib/AST/DeclCXX.cpp @@ -1294,15 +1294,20 @@ static bool recursivelyOverrides(const CXXMethodDecl *DerivedMD, } CXXMethodDecl * -CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD) { +CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD, + bool MayBeBase) { if (this->getParent()->getCanonicalDecl() == RD->getCanonicalDecl()) return this; // Lookup doesn't work for destructors, so handle them separately. if (isa(this)) { CXXMethodDecl *MD = RD->getDestructor(); - if (MD && recursivelyOverrides(MD, this)) - return MD; + if (MD) { + if (recursivelyOverrides(MD, this)) + return MD; + if (MayBeBase && recursivelyOverrides(this, MD)) + return MD; + } return NULL; } @@ -1313,6 +1318,8 @@ CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD) { continue; if (recursivelyOverrides(MD, this)) return MD; + if (MayBeBase && recursivelyOverrides(this, MD)) + return MD; } for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(), diff --git a/lib/AST/DeclPrinter.cpp b/lib/AST/DeclPrinter.cpp index aad0ca1..7f47604 100644 --- a/lib/AST/DeclPrinter.cpp +++ b/lib/AST/DeclPrinter.cpp @@ -26,7 +26,6 @@ using namespace clang; namespace { class DeclPrinter : public DeclVisitor { raw_ostream &Out; - ASTContext &Context; PrintingPolicy Policy; unsigned Indentation; bool PrintInstantiation; @@ -38,11 +37,9 @@ namespace { void Print(AccessSpecifier AS); public: - DeclPrinter(raw_ostream &Out, ASTContext &Context, - const PrintingPolicy &Policy, - unsigned Indentation = 0, - bool PrintInstantiation = false) - : Out(Out), Context(Context), Policy(Policy), Indentation(Indentation), + DeclPrinter(raw_ostream &Out, const PrintingPolicy &Policy, + unsigned Indentation = 0, bool PrintInstantiation = false) + : Out(Out), Policy(Policy), Indentation(Indentation), PrintInstantiation(PrintInstantiation) { } void VisitDeclContext(DeclContext *DC, bool Indent = true); @@ -96,7 +93,7 @@ void Decl::print(raw_ostream &Out, unsigned Indentation, void Decl::print(raw_ostream &Out, const PrintingPolicy &Policy, unsigned Indentation, bool PrintInstantiation) const { - DeclPrinter Printer(Out, getASTContext(), Policy, Indentation, PrintInstantiation); + DeclPrinter Printer(Out, Policy, Indentation, PrintInstantiation); Printer.Visit(const_cast(this)); } @@ -171,13 +168,17 @@ void DeclContext::dumpDeclContext() const { DC = DC->getParent(); ASTContext &Ctx = cast(DC)->getASTContext(); - DeclPrinter Printer(llvm::errs(), Ctx, Ctx.getPrintingPolicy(), 0); + DeclPrinter Printer(llvm::errs(), Ctx.getPrintingPolicy(), 0); Printer.VisitDeclContext(const_cast(this), /*Indent=*/false); } +void Decl::dump() const { + dump(llvm::errs()); +} + void Decl::dump(raw_ostream &Out) const { PrintingPolicy Policy = getASTContext().getPrintingPolicy(); - Policy.Dump = true; + Policy.DumpSourceManager = &getASTContext().getSourceManager(); print(Out, Policy, /*Indentation*/ 0, /*PrintInstantiation*/ true); } @@ -191,8 +192,8 @@ void DeclPrinter::prettyPrintAttributes(Decl *D) { if (D->hasAttrs()) { AttrVec &Attrs = D->getAttrs(); for (AttrVec::const_iterator i=Attrs.begin(), e=Attrs.end(); i!=e; ++i) { - Attr *A = *i; - A->printPretty(Out, Context); + Attr *A = *i; + A->printPretty(Out, Policy); } } } @@ -231,7 +232,7 @@ void DeclPrinter::VisitDeclContext(DeclContext *DC, bool Indent) { if (isa(*D)) continue; - if (!Policy.Dump) { + if (!Policy.DumpSourceManager) { // Skip over implicit declarations in pretty-printing mode. if (D->isImplicit()) continue; // FIXME: Ugly hack so we don't pretty-print the builtin declaration @@ -381,7 +382,7 @@ void DeclPrinter::VisitEnumConstantDecl(EnumConstantDecl *D) { Out << *D; if (Expr *Init = D->getInitExpr()) { Out << " = "; - Init->printPretty(Out, Context, 0, Policy, Indentation); + Init->printPretty(Out, 0, Policy, Indentation); } } @@ -420,7 +421,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { Proto += "("; if (FT) { llvm::raw_string_ostream POut(Proto); - DeclPrinter ParamPrinter(POut, Context, SubPolicy, Indentation); + DeclPrinter ParamPrinter(POut, SubPolicy, Indentation); for (unsigned i = 0, e = D->getNumParams(); i != e; ++i) { if (i) POut << ", "; ParamPrinter.VisitParmVarDecl(D->getParamDecl(i)); @@ -466,7 +467,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { if (FT->getExceptionSpecType() == EST_ComputedNoexcept) { Proto += "("; llvm::raw_string_ostream EOut(Proto); - FT->getNoexceptExpr()->printPretty(EOut, Context, 0, SubPolicy, + FT->getNoexceptExpr()->printPretty(EOut, 0, SubPolicy, Indentation); EOut.flush(); Proto += EOut.str(); @@ -522,7 +523,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { SimpleInit = Init; if (SimpleInit) - SimpleInit->printPretty(Out, Context, 0, Policy, Indentation); + SimpleInit->printPretty(Out, 0, Policy, Indentation); else { for (unsigned I = 0; I != NumArgs; ++I) { if (isa(Args[I])) @@ -530,7 +531,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { if (I) Out << ", "; - Args[I]->printPretty(Out, Context, 0, Policy, Indentation); + Args[I]->printPretty(Out, 0, Policy, Indentation); } } } @@ -554,7 +555,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { // This is a K&R function definition, so we need to print the // parameters. Out << '\n'; - DeclPrinter ParamPrinter(Out, Context, SubPolicy, Indentation); + DeclPrinter ParamPrinter(Out, SubPolicy, Indentation); Indentation += Policy.Indentation; for (unsigned i = 0, e = D->getNumParams(); i != e; ++i) { Indent(); @@ -565,7 +566,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { } else Out << ' '; - D->getBody()->printPretty(Out, Context, 0, SubPolicy, Indentation); + D->getBody()->printPretty(Out, 0, SubPolicy, Indentation); Out << '\n'; } } @@ -580,7 +581,7 @@ void DeclPrinter::VisitFieldDecl(FieldDecl *D) { if (D->isBitField()) { Out << " : "; - D->getBitWidth()->printPretty(Out, Context, 0, Policy, Indentation); + D->getBitWidth()->printPretty(Out, 0, Policy, Indentation); } Expr *Init = D->getInClassInitializer(); @@ -589,7 +590,7 @@ void DeclPrinter::VisitFieldDecl(FieldDecl *D) { Out << " "; else Out << " = "; - Init->printPretty(Out, Context, 0, Policy, Indentation); + Init->printPretty(Out, 0, Policy, Indentation); } prettyPrintAttributes(D); } @@ -625,7 +626,7 @@ void DeclPrinter::VisitVarDecl(VarDecl *D) { else if (D->getInitStyle() == VarDecl::CInit) { Out << " = "; } - Init->printPretty(Out, Context, 0, Policy, Indentation); + Init->printPretty(Out, 0, Policy, Indentation); if (D->getInitStyle() == VarDecl::CallInit) Out << ")"; } @@ -639,7 +640,7 @@ void DeclPrinter::VisitParmVarDecl(ParmVarDecl *D) { void DeclPrinter::VisitFileScopeAsmDecl(FileScopeAsmDecl *D) { Out << "__asm ("; - D->getAsmString()->printPretty(Out, Context, 0, Policy, Indentation); + D->getAsmString()->printPretty(Out, 0, Policy, Indentation); Out << ")"; } @@ -650,9 +651,9 @@ void DeclPrinter::VisitImportDecl(ImportDecl *D) { void DeclPrinter::VisitStaticAssertDecl(StaticAssertDecl *D) { Out << "static_assert("; - D->getAssertExpr()->printPretty(Out, Context, 0, Policy, Indentation); + D->getAssertExpr()->printPretty(Out, 0, Policy, Indentation); Out << ", "; - D->getMessage()->printPretty(Out, Context, 0, Policy, Indentation); + D->getMessage()->printPretty(Out, 0, Policy, Indentation); Out << ")"; } @@ -786,8 +787,7 @@ void DeclPrinter::PrintTemplateParameters( Args->get(i).print(Policy, Out); } else if (NTTP->hasDefaultArgument()) { Out << " = "; - NTTP->getDefaultArgument()->printPretty(Out, Context, 0, Policy, - Indentation); + NTTP->getDefaultArgument()->printPretty(Out, 0, Policy, Indentation); } } else if (const TemplateTemplateParmDecl *TTPD = dyn_cast(Param)) { @@ -871,7 +871,7 @@ void DeclPrinter::VisitObjCMethodDecl(ObjCMethodDecl *OMD) { if (OMD->getBody()) { Out << ' '; - OMD->getBody()->printPretty(Out, Context, 0, Policy); + OMD->getBody()->printPretty(Out, 0, Policy); Out << '\n'; } } diff --git a/lib/AST/DeclTemplate.cpp b/lib/AST/DeclTemplate.cpp index 5aebc2b..a7e8999 100644 --- a/lib/AST/DeclTemplate.cpp +++ b/lib/AST/DeclTemplate.cpp @@ -43,7 +43,8 @@ TemplateParameterList::Create(const ASTContext &C, SourceLocation TemplateLoc, unsigned NumParams, SourceLocation RAngleLoc) { unsigned Size = sizeof(TemplateParameterList) + sizeof(NamedDecl *) * NumParams; - unsigned Align = llvm::AlignOf::Alignment; + unsigned Align = std::max(llvm::alignOf(), + llvm::alignOf()); void *Mem = C.Allocate(Size, Align); return new (Mem) TemplateParameterList(TemplateLoc, LAngleLoc, Params, NumParams, RAngleLoc); diff --git a/lib/AST/DumpXML.cpp b/lib/AST/DumpXML.cpp index c1432b5..84f3fc4 100644 --- a/lib/AST/DumpXML.cpp +++ b/lib/AST/DumpXML.cpp @@ -1022,12 +1022,17 @@ struct XMLDumper : public XMLDeclVisitor, }; } +void Decl::dumpXML() const { + dump(llvm::errs()); +} + void Decl::dumpXML(raw_ostream &out) const { XMLDumper(out, getASTContext()).dispatch(const_cast(this)); } #else /* ifndef NDEBUG */ +void Decl::dumpXML() const {} void Decl::dumpXML(raw_ostream &out) const {} #endif diff --git a/lib/AST/NestedNameSpecifier.cpp b/lib/AST/NestedNameSpecifier.cpp index dbf267b..49b119b 100644 --- a/lib/AST/NestedNameSpecifier.cpp +++ b/lib/AST/NestedNameSpecifier.cpp @@ -18,6 +18,7 @@ #include "clang/AST/PrettyPrinter.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" +#include "llvm/Support/AlignOf.h" #include "llvm/Support/raw_ostream.h" #include @@ -33,7 +34,8 @@ NestedNameSpecifier::FindOrInsert(const ASTContext &Context, NestedNameSpecifier *NNS = Context.NestedNameSpecifiers.FindNodeOrInsertPos(ID, InsertPos); if (!NNS) { - NNS = new (Context, 4) NestedNameSpecifier(Mockup); + NNS = new (Context, llvm::alignOf()) + NestedNameSpecifier(Mockup); Context.NestedNameSpecifiers.InsertNode(NNS, InsertPos); } @@ -107,7 +109,9 @@ NestedNameSpecifier::Create(const ASTContext &Context, IdentifierInfo *II) { NestedNameSpecifier * NestedNameSpecifier::GlobalSpecifier(const ASTContext &Context) { if (!Context.GlobalNestedNameSpecifier) - Context.GlobalNestedNameSpecifier = new (Context, 4) NestedNameSpecifier(); + Context.GlobalNestedNameSpecifier = + new (Context, llvm::alignOf()) + NestedNameSpecifier(); return Context.GlobalNestedNameSpecifier; } @@ -630,4 +634,3 @@ NestedNameSpecifierLocBuilder::getWithLocInContext(ASTContext &Context) const { memcpy(Mem, Buffer, BufferSize); return NestedNameSpecifierLoc(Representation, Mem); } - diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp index c704cab..a5a3287 100644 --- a/lib/AST/RawCommentList.cpp +++ b/lib/AST/RawCommentList.cpp @@ -65,7 +65,7 @@ bool mergedCommentIsTrailingComment(StringRef Comment) { RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, bool Merged) : Range(SR), RawTextValid(false), BriefTextValid(false), - IsAlmostTrailingComment(false), + IsAttached(false), IsAlmostTrailingComment(false), BeginLineValid(false), EndLineValid(false) { // Extract raw comment text, if possible. if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { @@ -87,16 +87,6 @@ RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, } } -const Decl *RawComment::getDecl() const { - if (DeclOrParsedComment.isNull()) - return NULL; - - if (const Decl *D = DeclOrParsedComment.dyn_cast()) - return D; - - return DeclOrParsedComment.get()->getDecl(); -} - unsigned RawComment::getBeginLine(const SourceManager &SM) const { if (BeginLineValid) return BeginLine; @@ -169,7 +159,8 @@ const char *RawComment::extractBriefText(const ASTContext &Context) const { return BriefTextPtr; } -comments::FullComment *RawComment::parse(const ASTContext &Context) const { +comments::FullComment *RawComment::parse(const ASTContext &Context, + const Decl *D) const { // Make sure that RawText is valid. getRawText(Context.getSourceManager()); @@ -179,13 +170,11 @@ comments::FullComment *RawComment::parse(const ASTContext &Context) const { RawText.begin(), RawText.end()); comments::Sema S(Context.getAllocator(), Context.getSourceManager(), Context.getDiagnostics(), Traits); - S.setDecl(getDecl()); + S.setDecl(D); comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(), Context.getDiagnostics(), Traits); - comments::FullComment *FC = P.parseFullComment(); - DeclOrParsedComment = FC; - return FC; + return P.parseFullComment(); } namespace { diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp index d877c3f..77452c9 100644 --- a/lib/AST/Stmt.cpp +++ b/lib/AST/Stmt.cpp @@ -584,22 +584,27 @@ AsmStmt::AsmStmt(ASTContext &C, SourceLocation asmloc, bool issimple, } MSAsmStmt::MSAsmStmt(ASTContext &C, SourceLocation asmloc, - bool issimple, bool isvolatile, ArrayRef asmtoks, - ArrayRef lineends, StringRef asmstr, + SourceLocation lbraceloc, bool issimple, bool isvolatile, + ArrayRef asmtoks, ArrayRef inputs, + ArrayRef outputs, StringRef asmstr, ArrayRef clobbers, SourceLocation endloc) - : Stmt(MSAsmStmtClass), AsmLoc(asmloc), EndLoc(endloc), + : Stmt(MSAsmStmtClass), AsmLoc(asmloc), LBraceLoc(lbraceloc), EndLoc(endloc), AsmStr(asmstr.str()), IsSimple(issimple), IsVolatile(isvolatile), - NumAsmToks(asmtoks.size()), NumLineEnds(lineends.size()), - NumClobbers(clobbers.size()) { + NumAsmToks(asmtoks.size()), NumInputs(inputs.size()), + NumOutputs(outputs.size()), NumClobbers(clobbers.size()) { + + unsigned NumExprs = NumOutputs + NumInputs; + + Names = new (C) IdentifierInfo*[NumExprs]; + for (unsigned i = 0, e = NumOutputs; i != e; ++i) + Names[i] = outputs[i]; + for (unsigned i = NumOutputs, e = NumExprs; i != e; ++i) + Names[i] = inputs[i]; AsmToks = new (C) Token[NumAsmToks]; for (unsigned i = 0, e = NumAsmToks; i != e; ++i) AsmToks[i] = asmtoks[i]; - LineEnds = new (C) unsigned[NumLineEnds]; - for (unsigned i = 0, e = NumLineEnds; i != e; ++i) - LineEnds[i] = lineends[i]; - Clobbers = new (C) StringRef[NumClobbers]; for (unsigned i = 0, e = NumClobbers; i != e; ++i) { // FIXME: Avoid the allocation/copy if at all possible. diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp index 2f7cb55..c0960ce 100644 --- a/lib/AST/StmtPrinter.cpp +++ b/lib/AST/StmtPrinter.cpp @@ -30,17 +30,15 @@ using namespace clang; namespace { class StmtPrinter : public StmtVisitor { raw_ostream &OS; - ASTContext &Context; unsigned IndentLevel; clang::PrinterHelper* Helper; PrintingPolicy Policy; public: - StmtPrinter(raw_ostream &os, ASTContext &C, PrinterHelper* helper, + StmtPrinter(raw_ostream &os, PrinterHelper* helper, const PrintingPolicy &Policy, unsigned Indentation = 0) - : OS(os), Context(C), IndentLevel(Indentation), Helper(helper), - Policy(Policy) {} + : OS(os), IndentLevel(Indentation), Helper(helper), Policy(Policy) {} void PrintStmt(Stmt *S) { PrintStmt(S, Policy.Indentation); @@ -181,7 +179,7 @@ void StmtPrinter::VisitAttributedStmt(AttributedStmt *Node) { first = false; } // TODO: check this - (*it)->printPretty(OS, Context); + (*it)->printPretty(OS, Policy); } OS << "]] "; PrintStmt(Node->getSubStmt(), 0); @@ -432,7 +430,12 @@ void StmtPrinter::VisitAsmStmt(AsmStmt *Node) { void StmtPrinter::VisitMSAsmStmt(MSAsmStmt *Node) { // FIXME: Implement MS style inline asm statement printer. - Indent() << "asm ()"; + Indent() << "__asm "; + if (Node->hasBraces()) + OS << "{\n"; + OS << *(Node->getAsmString()) << "\n"; + if (Node->hasBraces()) + Indent() << "}\n"; } void StmtPrinter::VisitObjCAtTryStmt(ObjCAtTryStmt *Node) { @@ -1390,7 +1393,7 @@ void StmtPrinter::VisitCXXNewExpr(CXXNewExpr *E) { std::string TypeS; if (Expr *Size = E->getArraySize()) { llvm::raw_string_ostream s(TypeS); - Size->printPretty(s, Context, Helper, Policy); + Size->printPretty(s, Helper, Policy); s.flush(); TypeS = "[" + TypeS + "]"; } @@ -1799,13 +1802,12 @@ void StmtPrinter::VisitAsTypeExpr(AsTypeExpr *Node) { // Stmt method implementations //===----------------------------------------------------------------------===// -void Stmt::dumpPretty(ASTContext& Context) const { - printPretty(llvm::errs(), Context, 0, - PrintingPolicy(Context.getLangOpts())); +void Stmt::dumpPretty(ASTContext &Context) const { + printPretty(llvm::errs(), 0, PrintingPolicy(Context.getLangOpts())); } -void Stmt::printPretty(raw_ostream &OS, ASTContext& Context, - PrinterHelper* Helper, +void Stmt::printPretty(raw_ostream &OS, + PrinterHelper *Helper, const PrintingPolicy &Policy, unsigned Indentation) const { if (this == 0) { @@ -1813,12 +1815,12 @@ void Stmt::printPretty(raw_ostream &OS, ASTContext& Context, return; } - if (Policy.Dump && &Context) { - dump(OS, Context.getSourceManager()); + if (Policy.DumpSourceManager) { + dump(OS, *Policy.DumpSourceManager); return; } - StmtPrinter P(OS, Context, Helper, Policy, Indentation); + StmtPrinter P(OS, Helper, Policy, Indentation); P.Visit(const_cast(this)); } diff --git a/lib/AST/TemplateBase.cpp b/lib/AST/TemplateBase.cpp index f8dd396..95ff4ed 100644 --- a/lib/AST/TemplateBase.cpp +++ b/lib/AST/TemplateBase.cpp @@ -556,8 +556,7 @@ const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, const ASTTemplateArgumentListInfo * ASTTemplateArgumentListInfo::Create(ASTContext &C, const TemplateArgumentListInfo &List) { - std::size_t size = sizeof(CXXDependentScopeMemberExpr) + - ASTTemplateArgumentListInfo::sizeFor(List.size()); + std::size_t size = ASTTemplateArgumentListInfo::sizeFor(List.size()); void *Mem = C.Allocate(size, llvm::alignOf()); ASTTemplateArgumentListInfo *TAI = new (Mem) ASTTemplateArgumentListInfo(); TAI->initializeFrom(List); @@ -642,6 +641,7 @@ ASTTemplateKWAndArgsInfo::initializeFrom(SourceLocation TemplateKWLoc) { std::size_t ASTTemplateKWAndArgsInfo::sizeFor(unsigned NumTemplateArgs) { // Add space for the template keyword location. + // FIXME: There's room for this in the padding before the template args in + // 64-bit builds. return Base::sizeFor(NumTemplateArgs) + sizeof(SourceLocation); } - diff --git a/lib/Basic/Diagnostic.cpp b/lib/Basic/Diagnostic.cpp index e689502..8065b2d 100644 --- a/lib/Basic/Diagnostic.cpp +++ b/lib/Basic/Diagnostic.cpp @@ -145,6 +145,9 @@ DiagnosticsEngine::GetDiagStatePointForLoc(SourceLocation L) const { assert(DiagStatePoints.front().Loc.isInvalid() && "Should have created a DiagStatePoint for command-line"); + if (!SourceMgr) + return DiagStatePoints.end() - 1; + FullSourceLoc Loc(L, *SourceMgr); if (Loc.isInvalid()) return DiagStatePoints.end() - 1; @@ -167,8 +170,9 @@ void DiagnosticsEngine::setDiagnosticMapping(diag::kind Diag, diag::Mapping Map, (Map == diag::MAP_FATAL || Map == diag::MAP_ERROR)) && "Cannot map errors into warnings!"); assert(!DiagStatePoints.empty()); + assert((L.isInvalid() || SourceMgr) && "No SourceMgr for valid location"); - FullSourceLoc Loc(L, *SourceMgr); + FullSourceLoc Loc = SourceMgr? FullSourceLoc(L, *SourceMgr) : FullSourceLoc(); FullSourceLoc LastStateChangePos = DiagStatePoints.back().Loc; // Don't allow a mapping to a warning override an error/fatal mapping. if (Map == diag::MAP_WARNING) { diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 883864f..1d495f1 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -3083,9 +3083,8 @@ public: unsigned &NumAliases) const; virtual bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &Info) const { - // FIXME: Check if this is complete switch (*Name) { - default: + default: break; case 'l': // r0-r7 case 'h': // r8-r15 case 'w': // VFP Floating point register single precision diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 65c782e..59ed313 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -229,6 +229,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Result); } + + case Builtin::BI__builtin_conj: + case Builtin::BI__builtin_conjf: + case Builtin::BI__builtin_conjl: { + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + Value *Real = ComplexVal.first; + Value *Imag = ComplexVal.second; + Value *Zero = + Imag->getType()->isFPOrFPVectorTy() + ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) + : llvm::Constant::getNullValue(Imag->getType()); + + Imag = Builder.CreateFSub(Zero, Imag, "sub"); + return RValue::getComplex(std::make_pair(Real, Imag)); + } + case Builtin::BI__builtin_creal: + case Builtin::BI__builtin_crealf: + case Builtin::BI__builtin_creall: { + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + return RValue::get(ComplexVal.first); + } + + case Builtin::BI__builtin_cimag: + case Builtin::BI__builtin_cimagf: + case Builtin::BI__builtin_cimagl: { + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + return RValue::get(ComplexVal.second); + } + case Builtin::BI__builtin_ctzs: case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: @@ -1720,8 +1749,29 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops.push_back(GetPointeeAlignmentValue(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty), Ops, "vld1"); - case ARM::BI__builtin_neon_vld1_lane_v: - case ARM::BI__builtin_neon_vld1q_lane_v: { + case ARM::BI__builtin_neon_vld1q_lane_v: + // Handle 64-bit integer elements as a special case. Use shuffles of + // one-element vectors to avoid poor code for i64 in the backend. + if (VTy->getElementType()->isIntegerTy(64)) { + // Extract the other lane. + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + int Lane = cast(Ops[2])->getZExtValue(); + Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); + Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); + // Load the value as a one-element vector. + Ty = llvm::VectorType::get(VTy->getElementType(), 1); + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); + Value *Ld = Builder.CreateCall2(F, Ops[0], + GetPointeeAlignmentValue(E->getArg(0))); + // Combine them. + SmallVector Indices; + Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); + Indices.push_back(ConstantInt::get(Int32Ty, Lane)); + SV = llvm::ConstantVector::get(Indices); + return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); + } + // fall through + case ARM::BI__builtin_neon_vld1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); @@ -2086,8 +2136,19 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops.push_back(GetPointeeAlignmentValue(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty), Ops, ""); - case ARM::BI__builtin_neon_vst1_lane_v: - case ARM::BI__builtin_neon_vst1q_lane_v: { + case ARM::BI__builtin_neon_vst1q_lane_v: + // Handle 64-bit integer elements as a special case. Use a shuffle to get + // a one-element vector and avoid poor code for i64 in the backend. + if (VTy->getElementType()->isIntegerTy(64)) { + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Value *SV = llvm::ConstantVector::get(cast(Ops[2])); + Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); + Ops[2] = GetPointeeAlignmentValue(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, + Ops[1]->getType()), Ops); + } + // fall through + case ARM::BI__builtin_neon_vst1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 00127ac..fd1c7a3 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -94,8 +94,10 @@ llvm::DIDescriptor CGDebugInfo::getContextDescriptor(const Decl *Context) { llvm::DenseMap::iterator I = RegionMap.find(Context); - if (I != RegionMap.end()) - return llvm::DIDescriptor(dyn_cast_or_null(&*I->second)); + if (I != RegionMap.end()) { + llvm::Value *V = I->second; + return llvm::DIDescriptor(dyn_cast_or_null(V)); + } // Check namespace. if (const NamespaceDecl *NSDecl = dyn_cast(Context)) @@ -227,8 +229,8 @@ llvm::DIFile CGDebugInfo::getOrCreateFile(SourceLocation Loc) { if (it != DIFileCache.end()) { // Verify that the information still exists. - if (&*it->second) - return llvm::DIFile(cast(it->second)); + if (llvm::Value *V = it->second) + return llvm::DIFile(cast(V)); } llvm::DIFile F = DBuilder.createFile(PLoc.getFilename(), getCurrentDirname()); @@ -525,8 +527,10 @@ llvm::DIDescriptor CGDebugInfo::createContextChain(const Decl *Context) { // See if we already have the parent. llvm::DenseMap::iterator I = RegionMap.find(Context); - if (I != RegionMap.end()) - return llvm::DIDescriptor(dyn_cast_or_null(&*I->second)); + if (I != RegionMap.end()) { + llvm::Value *V = I->second; + return llvm::DIDescriptor(dyn_cast_or_null(V)); + } // Check namespace. if (const NamespaceDecl *NSDecl = dyn_cast(Context)) @@ -1660,8 +1664,8 @@ llvm::DIType CGDebugInfo::getTypeOrNull(QualType Ty) { TypeCache.find(Ty.getAsOpaquePtr()); if (it != TypeCache.end()) { // Verify that the debug info still exists. - if (&*it->second) - return llvm::DIType(cast(it->second)); + if (llvm::Value *V = it->second) + return llvm::DIType(cast(V)); } return llvm::DIType(); @@ -1679,8 +1683,8 @@ llvm::DIType CGDebugInfo::getCompletedTypeOrNull(QualType Ty) { CompletedTypeCache.find(Ty.getAsOpaquePtr()); if (it != CompletedTypeCache.end()) { // Verify that the debug info still exists. - if (&*it->second) - return llvm::DIType(cast(it->second)); + if (llvm::Value *V = it->second) + return llvm::DIType(cast(V)); } return llvm::DIType(); @@ -1942,7 +1946,8 @@ llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) { llvm::DenseMap::iterator MI = SPCache.find(FD->getCanonicalDecl()); if (MI != SPCache.end()) { - llvm::DISubprogram SP(dyn_cast_or_null(&*MI->second)); + llvm::Value *V = MI->second; + llvm::DISubprogram SP(dyn_cast_or_null(V)); if (SP.isSubprogram() && !llvm::DISubprogram(SP).isDefinition()) return SP; } @@ -1953,7 +1958,8 @@ llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) { llvm::DenseMap::iterator MI = SPCache.find(NextFD->getCanonicalDecl()); if (MI != SPCache.end()) { - llvm::DISubprogram SP(dyn_cast_or_null(&*MI->second)); + llvm::Value *V = MI->second; + llvm::DISubprogram SP(dyn_cast_or_null(V)); if (SP.isSubprogram() && !llvm::DISubprogram(SP).isDefinition()) return SP; } @@ -2013,7 +2019,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType, llvm::DenseMap::iterator FI = SPCache.find(FD->getCanonicalDecl()); if (FI != SPCache.end()) { - llvm::DIDescriptor SP(dyn_cast_or_null(&*FI->second)); + llvm::Value *V = FI->second; + llvm::DIDescriptor SP(dyn_cast_or_null(V)); if (SP.isSubprogram() && llvm::DISubprogram(SP).isDefinition()) { llvm::MDNode *SPN = SP; LexicalBlockStack.push_back(SPN); @@ -2701,15 +2708,15 @@ void CGDebugInfo::finalize(void) { = ReplaceMap.begin(), VE = ReplaceMap.end(); VI != VE; ++VI) { llvm::DIType Ty, RepTy; // Verify that the debug info still exists. - if (&*VI->second) - Ty = llvm::DIType(cast(VI->second)); + if (llvm::Value *V = VI->second) + Ty = llvm::DIType(cast(V)); llvm::DenseMap::iterator it = TypeCache.find(VI->first); if (it != TypeCache.end()) { // Verify that the debug info still exists. - if (&*it->second) - RepTy = llvm::DIType(cast(it->second)); + if (llvm::Value *V = it->second) + RepTy = llvm::DIType(cast(V)); } if (Ty.Verify() && Ty.isForwardDecl() && RepTy.Verify()) { diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index ecee7b4..1fe4c18 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -938,6 +938,50 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) { llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile, unsigned Alignment, QualType Ty, llvm::MDNode *TBAAInfo) { + + // For better performance, handle vector loads differently. + if (Ty->isVectorType()) { + llvm::Value *V; + const llvm::Type *EltTy = + cast(Addr->getType())->getElementType(); + + const llvm::VectorType *VTy = cast(EltTy); + + // Handle vectors of size 3, like size 4 for better performance. + if (VTy->getNumElements() == 3) { + + // Bitcast to vec4 type. + llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(), + 4); + llvm::PointerType *ptVec4Ty = + llvm::PointerType::get(vec4Ty, + (cast( + Addr->getType()))->getAddressSpace()); + llvm::Value *Cast = Builder.CreateBitCast(Addr, ptVec4Ty, + "castToVec4"); + // Now load value. + llvm::Value *LoadVal = Builder.CreateLoad(Cast, Volatile, "loadVec4"); + + // Shuffle vector to get vec3. + llvm::SmallVector Mask; + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 0)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 1)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 2)); + + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + V = Builder.CreateShuffleVector(LoadVal, + llvm::UndefValue::get(vec4Ty), + MaskV, "extractVec"); + return EmitFromMemory(V, Ty); + } + } + llvm::LoadInst *Load = Builder.CreateLoad(Addr); if (Volatile) Load->setVolatile(true); @@ -984,6 +1028,42 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr, QualType Ty, llvm::MDNode *TBAAInfo, bool isInit) { + + // Handle vectors differently to get better performance. + if (Ty->isVectorType()) { + llvm::Type *SrcTy = Value->getType(); + llvm::VectorType *VecTy = cast(SrcTy); + // Handle vec3 special. + if (VecTy->getNumElements() == 3) { + llvm::LLVMContext &VMContext = getLLVMContext(); + + // Our source is a vec3, do a shuffle vector to make it a vec4. + llvm::SmallVector Mask; + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 0)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 1)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 2)); + Mask.push_back(llvm::UndefValue::get(llvm::Type::getInt32Ty(VMContext))); + + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + Value = Builder.CreateShuffleVector(Value, + llvm::UndefValue::get(VecTy), + MaskV, "extractVec"); + SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); + } + llvm::PointerType *DstPtr = cast(Addr->getType()); + if (DstPtr->getElementType() != SrcTy) { + llvm::Type *MemTy = + llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace()); + Addr = Builder.CreateBitCast(Addr, MemTy, "storetmp"); + } + } + Value = EmitToMemory(Value, Ty); llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile); diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 7c2c9f1..31ea1b5 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -123,7 +123,14 @@ static bool canDevirtualizeMemberFunctionCalls(ASTContext &Context, return false; } - + + // We can devirtualize calls on an object accessed by a class member access + // expression, since by C++11 [basic.life]p6 we know that it can't refer to + // a derived class object constructed in the same location. + if (const MemberExpr *ME = dyn_cast(Base)) + if (const ValueDecl *VD = dyn_cast(ME->getMemberDecl())) + return VD->getType()->isRecordType(); + // We can always devirtualize calls on temporary object expressions. if (isa(Base)) return true; diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 467c779..d78908d 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -1691,14 +1691,36 @@ void CodeGenFunction::EmitMSAsmStmt(const MSAsmStmt &S) { std::vector Args; std::vector ArgTypes; + std::string Constraints; + + // Clobbers + for (unsigned i = 0, e = S.getNumClobbers(); i != e; ++i) { + StringRef Clobber = S.getClobber(i); + + if (Clobber != "memory" && Clobber != "cc") + Clobber = Target.getNormalizedGCCRegisterName(Clobber); + + if (i != 0) + Constraints += ','; + + Constraints += "~{"; + Constraints += Clobber; + Constraints += '}'; + } + // Add machine specific clobbers std::string MachineClobbers = Target.getClobbers(); + if (!MachineClobbers.empty()) { + if (!Constraints.empty()) + Constraints += ','; + Constraints += MachineClobbers; + } llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, ArgTypes, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, *S.getAsmString(), MachineClobbers, true); + llvm::InlineAsm::get(FTy, *S.getAsmString(), Constraints, true); llvm::CallInst *Result = Builder.CreateCall(IA, Args); Result->addAttribute(~0, llvm::Attribute::NoUnwind); Result->addAttribute(~0, llvm::Attribute::IANSDialect); diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index a46f313..c2b8e4d 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -128,7 +128,7 @@ class LValue { // The alignment to use when accessing this lvalue. (For vector elements, // this is the alignment of the whole vector.) - unsigned short Alignment; + int64_t Alignment; // objective-c's ivar bool Ivar:1; diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp index b4234cf..ed67f7b 100644 --- a/lib/Driver/Tools.cpp +++ b/lib/Driver/Tools.cpp @@ -5723,6 +5723,9 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(Plugin)); } + if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) + CmdArgs.push_back("--no-demangle"); + AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); if (D.CCCIsCXX && diff --git a/lib/Frontend/ASTConsumers.cpp b/lib/Frontend/ASTConsumers.cpp index bb1a4e6..0f0d835 100644 --- a/lib/Frontend/ASTConsumers.cpp +++ b/lib/Frontend/ASTConsumers.cpp @@ -89,7 +89,7 @@ namespace { class ASTDeclNodeLister : public ASTConsumer, public RecursiveASTVisitor { - typedef RecursiveASTVisitor base; + typedef RecursiveASTVisitor base; public: ASTDeclNodeLister(raw_ostream *Out = NULL) diff --git a/lib/Frontend/CacheTokens.cpp b/lib/Frontend/CacheTokens.cpp index 58a6b8d..3e66613 100644 --- a/lib/Frontend/CacheTokens.cpp +++ b/lib/Frontend/CacheTokens.cpp @@ -447,7 +447,7 @@ Offset PTHWriter::EmitCachedSpellings() { void PTHWriter::GeneratePTH(const std::string &MainFile) { // Generate the prologue. - Out << "cfe-pth"; + Out << "cfe-pth" << '\0'; Emit32(PTHManager::Version); // Leave 4 words for the prologue. diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp index f104f96..67738e9 100644 --- a/lib/Lex/PTHLexer.cpp +++ b/lib/Lex/PTHLexer.cpp @@ -452,14 +452,14 @@ PTHManager *PTHManager::Create(const std::string &file, const unsigned char *BufEnd = (unsigned char*)File->getBufferEnd(); // Check the prologue of the file. - if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 3 + 4) || - memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) { + if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) || + memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) { Diags.Report(diag::err_invalid_pth_file) << file; return 0; } // Read the PTH version. - const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1); + const unsigned char *p = BufBeg + (sizeof("cfe-pth")); unsigned Version = ReadLE32(p); if (Version < PTHManager::Version) { diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index b830d9c..cb865cc 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -68,7 +68,6 @@ static bool isAttributeLateParsed(const IdentifierInfo &II) { .Default(false); } - /// ParseGNUAttributes - Parse a non-empty attributes list. /// /// [GNU] attributes: @@ -193,6 +192,11 @@ void Parser::ParseGNUAttributeArgs(IdentifierInfo *AttrName, ParseThreadSafetyAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc); return; } + // Type safety attributes have their own grammar. + if (AttrName->isStr("type_tag_for_datatype")) { + ParseTypeTagForDatatypeAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc); + return; + } ConsumeParen(); // ignore the left paren loc for now @@ -866,7 +870,8 @@ void Parser::ParseLexedAttributes(ParsingClass &Class) { void Parser::ParseLexedAttributeList(LateParsedAttrList &LAs, Decl *D, bool EnterScope, bool OnDefinition) { for (unsigned i = 0, ni = LAs.size(); i < ni; ++i) { - LAs[i]->addDecl(D); + if (D) + LAs[i]->addDecl(D); ParseLexedAttribute(*LAs[i], EnterScope, OnDefinition); delete LAs[i]; } @@ -1019,6 +1024,70 @@ void Parser::ParseThreadSafetyAttribute(IdentifierInfo &AttrName, *EndLoc = T.getCloseLocation(); } +void Parser::ParseTypeTagForDatatypeAttribute(IdentifierInfo &AttrName, + SourceLocation AttrNameLoc, + ParsedAttributes &Attrs, + SourceLocation *EndLoc) { + assert(Tok.is(tok::l_paren) && "Attribute arg list not starting with '('"); + + BalancedDelimiterTracker T(*this, tok::l_paren); + T.consumeOpen(); + + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + T.skipToEnd(); + return; + } + IdentifierInfo *ArgumentKind = Tok.getIdentifierInfo(); + SourceLocation ArgumentKindLoc = ConsumeToken(); + + if (Tok.isNot(tok::comma)) { + Diag(Tok, diag::err_expected_comma); + T.skipToEnd(); + return; + } + ConsumeToken(); + + SourceRange MatchingCTypeRange; + TypeResult MatchingCType = ParseTypeName(&MatchingCTypeRange); + if (MatchingCType.isInvalid()) { + T.skipToEnd(); + return; + } + + bool LayoutCompatible = false; + bool MustBeNull = false; + while (Tok.is(tok::comma)) { + ConsumeToken(); + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_expected_ident); + T.skipToEnd(); + return; + } + IdentifierInfo *Flag = Tok.getIdentifierInfo(); + if (Flag->isStr("layout_compatible")) + LayoutCompatible = true; + else if (Flag->isStr("must_be_null")) + MustBeNull = true; + else { + Diag(Tok, diag::err_type_safety_unknown_flag) << Flag; + T.skipToEnd(); + return; + } + ConsumeToken(); // consume flag + } + + if (!T.consumeClose()) { + Attrs.addNewTypeTagForDatatype(&AttrName, AttrNameLoc, 0, AttrNameLoc, + ArgumentKind, ArgumentKindLoc, + MatchingCType.release(), LayoutCompatible, + MustBeNull, AttributeList::AS_GNU); + } + + if (EndLoc) + *EndLoc = T.getCloseLocation(); +} + /// DiagnoseProhibitedCXX11Attribute - We have found the opening square brackets /// of a C++11 attribute-specifier in a location where an attribute is not /// permitted. By C++11 [dcl.attr.grammar]p6, this is ill-formed. Diagnose this diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp index d2e4309..df9b996 100644 --- a/lib/Parse/ParseStmt.cpp +++ b/lib/Parse/ParseStmt.cpp @@ -1495,8 +1495,7 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { StmtResult ForEachStmt; if (ForRange) { - ForRangeStmt = Actions.ActOnCXXForRangeStmt(ForLoc, T.getOpenLocation(), - FirstPart.take(), + ForRangeStmt = Actions.ActOnCXXForRangeStmt(ForLoc, FirstPart.take(), ForRangeInit.ColonLoc, ForRangeInit.RangeExpr.get(), T.getCloseLocation()); @@ -1505,7 +1504,7 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { // Similarly, we need to do the semantic analysis for a for-range // statement immediately in order to close over temporaries correctly. } else if (ForEach) { - ForEachStmt = Actions.ActOnObjCForCollectionStmt(ForLoc, T.getOpenLocation(), + ForEachStmt = Actions.ActOnObjCForCollectionStmt(ForLoc, FirstPart.take(), Collection.take(), T.getCloseLocation()); @@ -1657,112 +1656,98 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { SourceManager &SrcMgr = PP.getSourceManager(); SourceLocation EndLoc = AsmLoc; SmallVector AsmToks; - SmallVector LineEnds; - do { - bool InBraces = false; - unsigned short savedBraceCount = 0; - bool InAsmComment = false; - FileID FID; - unsigned LineNo = 0; - unsigned NumTokensRead = 0; - SourceLocation LBraceLoc; - - if (Tok.is(tok::l_brace)) { - // Braced inline asm: consume the opening brace. - InBraces = true; - savedBraceCount = BraceCount; - EndLoc = LBraceLoc = ConsumeBrace(); - ++NumTokensRead; - } else { - // Single-line inline asm; compute which line it is on. - std::pair ExpAsmLoc = - SrcMgr.getDecomposedExpansionLoc(EndLoc); - FID = ExpAsmLoc.first; - LineNo = SrcMgr.getLineNumber(FID, ExpAsmLoc.second); - } - SourceLocation TokLoc = Tok.getLocation(); - do { - // If we hit EOF, we're done, period. - if (Tok.is(tok::eof)) - break; + bool InBraces = false; + unsigned short savedBraceCount = 0; + bool InAsmComment = false; + FileID FID; + unsigned LineNo = 0; + unsigned NumTokensRead = 0; + SourceLocation LBraceLoc; + + if (Tok.is(tok::l_brace)) { + // Braced inline asm: consume the opening brace. + InBraces = true; + savedBraceCount = BraceCount; + EndLoc = LBraceLoc = ConsumeBrace(); + ++NumTokensRead; + } else { + // Single-line inline asm; compute which line it is on. + std::pair ExpAsmLoc = + SrcMgr.getDecomposedExpansionLoc(EndLoc); + FID = ExpAsmLoc.first; + LineNo = SrcMgr.getLineNumber(FID, ExpAsmLoc.second); + } - // The asm keyword is a statement separator, so multiple asm statements - // are allowed. - if (!InAsmComment && Tok.is(tok::kw_asm)) - break; + SourceLocation TokLoc = Tok.getLocation(); + do { + // If we hit EOF, we're done, period. + if (Tok.is(tok::eof)) + break; - if (!InAsmComment && Tok.is(tok::semi)) { - // A semicolon in an asm is the start of a comment. - InAsmComment = true; - if (InBraces) { - // Compute which line the comment is on. - std::pair ExpSemiLoc = - SrcMgr.getDecomposedExpansionLoc(TokLoc); - FID = ExpSemiLoc.first; - LineNo = SrcMgr.getLineNumber(FID, ExpSemiLoc.second); - } - } else if (!InBraces || InAsmComment) { - // If end-of-line is significant, check whether this token is on a - // new line. - std::pair ExpLoc = - SrcMgr.getDecomposedExpansionLoc(TokLoc); - if (ExpLoc.first != FID || - SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second) != LineNo) { - // If this is a single-line __asm, we're done. - if (!InBraces) - break; - // We're no longer in a comment. - InAsmComment = false; - } else if (!InAsmComment && Tok.is(tok::r_brace)) { - // Single-line asm always ends when a closing brace is seen. - // FIXME: This is compatible with Apple gcc's -fasm-blocks; what - // does MSVC do here? - break; - } + if (!InAsmComment && Tok.is(tok::semi)) { + // A semicolon in an asm is the start of a comment. + InAsmComment = true; + if (InBraces) { + // Compute which line the comment is on. + std::pair ExpSemiLoc = + SrcMgr.getDecomposedExpansionLoc(TokLoc); + FID = ExpSemiLoc.first; + LineNo = SrcMgr.getLineNumber(FID, ExpSemiLoc.second); } - if (!InAsmComment && InBraces && Tok.is(tok::r_brace) && - BraceCount == (savedBraceCount + 1)) { - // Consume the closing brace, and finish - EndLoc = ConsumeBrace(); + } else if (!InBraces || InAsmComment) { + // If end-of-line is significant, check whether this token is on a + // new line. + std::pair ExpLoc = + SrcMgr.getDecomposedExpansionLoc(TokLoc); + if (ExpLoc.first != FID || + SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second) != LineNo) { + // If this is a single-line __asm, we're done. + if (!InBraces) + break; + // We're no longer in a comment. + InAsmComment = false; + } else if (!InAsmComment && Tok.is(tok::r_brace)) { + // Single-line asm always ends when a closing brace is seen. + // FIXME: This is compatible with Apple gcc's -fasm-blocks; what + // does MSVC do here? break; } - - // Consume the next token; make sure we don't modify the brace count etc. - // if we are in a comment. - EndLoc = TokLoc; - if (InAsmComment) - PP.Lex(Tok); - else { - AsmToks.push_back(Tok); - ConsumeAnyToken(); - } - TokLoc = Tok.getLocation(); - ++NumTokensRead; - } while (1); - - LineEnds.push_back(AsmToks.size()); - - if (InBraces && BraceCount != savedBraceCount) { - // __asm without closing brace (this can happen at EOF). - Diag(Tok, diag::err_expected_rbrace); - Diag(LBraceLoc, diag::note_matching) << "{"; - return StmtError(); - } else if (NumTokensRead == 0) { - // Empty __asm. - Diag(Tok, diag::err_expected_lbrace); - return StmtError(); } - // Multiple adjacent asm's form together into a single asm statement - // in the AST. - if (!Tok.is(tok::kw_asm)) + if (!InAsmComment && InBraces && Tok.is(tok::r_brace) && + BraceCount == (savedBraceCount + 1)) { + // Consume the closing brace, and finish + EndLoc = ConsumeBrace(); break; - EndLoc = ConsumeToken(); + } + + // Consume the next token; make sure we don't modify the brace count etc. + // if we are in a comment. + EndLoc = TokLoc; + if (InAsmComment) + PP.Lex(Tok); + else { + AsmToks.push_back(Tok); + ConsumeAnyToken(); + } + TokLoc = Tok.getLocation(); + ++NumTokensRead; } while (1); + if (InBraces && BraceCount != savedBraceCount) { + // __asm without closing brace (this can happen at EOF). + Diag(Tok, diag::err_expected_rbrace); + Diag(LBraceLoc, diag::note_matching) << "{"; + return StmtError(); + } else if (NumTokensRead == 0) { + // Empty __asm. + Diag(Tok, diag::err_expected_lbrace); + return StmtError(); + } + // FIXME: We should be passing source locations for better diagnostics. - return Actions.ActOnMSAsmStmt(AsmLoc, llvm::makeArrayRef(AsmToks), - llvm::makeArrayRef(LineEnds), EndLoc); + return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLoc, + llvm::makeArrayRef(AsmToks), EndLoc); } /// ParseAsmStatement - Parse a GNU extended asm statement. diff --git a/lib/Rewrite/RewriteModernObjC.cpp b/lib/Rewrite/RewriteModernObjC.cpp index 9f42fca..dcd003f 100644 --- a/lib/Rewrite/RewriteModernObjC.cpp +++ b/lib/Rewrite/RewriteModernObjC.cpp @@ -241,7 +241,7 @@ namespace { // Get the new text. std::string SStr; llvm::raw_string_ostream S(SStr); - New->printPretty(S, *Context, 0, PrintingPolicy(LangOpts)); + New->printPretty(S, 0, PrintingPolicy(LangOpts)); const std::string &Str = S.str(); // If replacement succeeded or warning disabled return with no warning. @@ -2549,8 +2549,7 @@ Stmt *RewriteModernObjC::RewriteObjCStringLiteral(ObjCStringLiteral *Exp) { // The pretty printer for StringLiteral handles escape characters properly. std::string prettyBufS; llvm::raw_string_ostream prettyBuf(prettyBufS); - Exp->getString()->printPretty(prettyBuf, *Context, 0, - PrintingPolicy(LangOpts)); + Exp->getString()->printPretty(prettyBuf, 0, PrintingPolicy(LangOpts)); Preamble += prettyBuf.str(); Preamble += ","; Preamble += utostr(Exp->getString()->getByteLength()) + "};\n"; @@ -4341,7 +4340,7 @@ void RewriteModernObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart, std::string SStr; llvm::raw_string_ostream constructorExprBuf(SStr); - GlobalConstructionExp->printPretty(constructorExprBuf, *Context, 0, + GlobalConstructionExp->printPretty(constructorExprBuf, 0, PrintingPolicy(LangOpts)); globalBuf += constructorExprBuf.str(); globalBuf += ";\n"; @@ -5610,7 +5609,7 @@ Stmt *RewriteModernObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) { // Get the new text. std::string SStr; llvm::raw_string_ostream Buf(SStr); - Replacement->printPretty(Buf, *Context); + Replacement->printPretty(Buf); const std::string &Str = Buf.str(); printf("CAST = %s\n", &Str[0]); diff --git a/lib/Rewrite/RewriteObjC.cpp b/lib/Rewrite/RewriteObjC.cpp index 425cd77..37c17e6 100644 --- a/lib/Rewrite/RewriteObjC.cpp +++ b/lib/Rewrite/RewriteObjC.cpp @@ -227,7 +227,7 @@ namespace { // Get the new text. std::string SStr; llvm::raw_string_ostream S(SStr); - New->printPretty(S, *Context, 0, PrintingPolicy(LangOpts)); + New->printPretty(S, 0, PrintingPolicy(LangOpts)); const std::string &Str = S.str(); // If replacement succeeded or warning disabled return with no warning. @@ -1720,8 +1720,7 @@ Stmt *RewriteObjC::RewriteObjCSynchronizedStmt(ObjCAtSynchronizedStmt *S) { CK, syncExpr); std::string syncExprBufS; llvm::raw_string_ostream syncExprBuf(syncExprBufS); - syncExpr->printPretty(syncExprBuf, *Context, 0, - PrintingPolicy(LangOpts)); + syncExpr->printPretty(syncExprBuf, 0, PrintingPolicy(LangOpts)); syncBuf += syncExprBuf.str(); syncBuf += ");"; @@ -2553,8 +2552,7 @@ Stmt *RewriteObjC::RewriteObjCStringLiteral(ObjCStringLiteral *Exp) { // The pretty printer for StringLiteral handles escape characters properly. std::string prettyBufS; llvm::raw_string_ostream prettyBuf(prettyBufS); - Exp->getString()->printPretty(prettyBuf, *Context, 0, - PrintingPolicy(LangOpts)); + Exp->getString()->printPretty(prettyBuf, 0, PrintingPolicy(LangOpts)); Preamble += prettyBuf.str(); Preamble += ","; Preamble += utostr(Exp->getString()->getByteLength()) + "};\n"; @@ -4885,7 +4883,7 @@ Stmt *RewriteObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) { // Get the new text. std::string SStr; llvm::raw_string_ostream Buf(SStr); - Replacement->printPretty(Buf, *Context); + Replacement->printPretty(Buf); const std::string &Str = Buf.str(); printf("CAST = %s\n", &Str[0]); diff --git a/lib/Sema/AttributeList.cpp b/lib/Sema/AttributeList.cpp index 0f209fd..7c79879 100644 --- a/lib/Sema/AttributeList.cpp +++ b/lib/Sema/AttributeList.cpp @@ -21,6 +21,8 @@ using namespace clang; size_t AttributeList::allocated_size() const { if (IsAvailability) return AttributeFactory::AvailabilityAllocSize; + else if (IsTypeTagForDatatype) + return AttributeFactory::TypeTagForDatatypeAllocSize; return (sizeof(AttributeList) + NumArgs * sizeof(Expr*)); } diff --git a/lib/Sema/SemaCast.cpp b/lib/Sema/SemaCast.cpp index 8199751..d8d51e7 100644 --- a/lib/Sema/SemaCast.cpp +++ b/lib/Sema/SemaCast.cpp @@ -1477,6 +1477,21 @@ void Sema::CheckCompatibleReinterpretCast(QualType SrcType, QualType DestType, Diag(Range.getBegin(), DiagID) << SrcType << DestType << Range; } +static void DiagnoseCastOfObjCSEL(Sema &Self, const ExprResult &SrcExpr, + QualType DestType) { + QualType SrcType = SrcExpr.get()->getType(); + if (const PointerType *SrcPtrTy = SrcType->getAs()) + if (SrcPtrTy->isObjCSelType()) { + QualType DT = DestType; + if (isa(DestType)) + DT = DestType->getPointeeType(); + if (!DT.getUnqualifiedType()->isVoidType()) + Self.Diag(SrcExpr.get()->getExprLoc(), + diag::warn_cast_pointer_from_sel) + << SrcType << DestType << SrcExpr.get()->getSourceRange(); + } +} + static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr, QualType DestType, bool CStyle, const SourceRange &OpRange, @@ -1721,7 +1736,9 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr, if (CStyle && DestType->isObjCObjectPointerType()) { return TC_Success; } - + if (CStyle) + DiagnoseCastOfObjCSEL(Self, SrcExpr, DestType); + // Not casting away constness, so the only remaining check is for compatible // pointer categories. @@ -2058,6 +2075,7 @@ void CastOperation::CheckCStyleCast() { return; } } + DiagnoseCastOfObjCSEL(Self, SrcExpr, DestType); Kind = Self.PrepareScalarCast(SrcExpr, DestType); if (SrcExpr.isInvalid()) diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 2594648..2559f00 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -513,6 +513,13 @@ void Sema::checkCall(NamedDecl *FDecl, Expr **Args, I = FDecl->specific_attr_begin(), E = FDecl->specific_attr_end(); I != E; ++I) CheckNonNullArguments(*I, Args, Loc); + + // Type safety checking. + for (specific_attr_iterator + i = FDecl->specific_attr_begin(), + e = FDecl->specific_attr_end(); i != e; ++i) { + CheckArgumentWithTypeTag(*i, Args); + } } /// CheckConstructorCall - Check a constructor call for correctness and safety @@ -3170,7 +3177,7 @@ void Sema::CheckStrlcpycatArguments(const CallExpr *Call, SmallString<128> sizeString; llvm::raw_svector_ostream OS(sizeString); OS << "sizeof("; - DstArg->printPretty(OS, Context, 0, getPrintingPolicy()); + DstArg->printPretty(OS, 0, getPrintingPolicy()); OS << ")"; Diag(OriginalSizeArg->getLocStart(), diag::note_strlcpycat_wrong_size) @@ -3267,10 +3274,10 @@ void Sema::CheckStrncatArguments(const CallExpr *CE, SmallString<128> sizeString; llvm::raw_svector_ostream OS(sizeString); OS << "sizeof("; - DstArg->printPretty(OS, Context, 0, getPrintingPolicy()); + DstArg->printPretty(OS, 0, getPrintingPolicy()); OS << ") - "; OS << "strlen("; - DstArg->printPretty(OS, Context, 0, getPrintingPolicy()); + DstArg->printPretty(OS, 0, getPrintingPolicy()); OS << ") - 1"; Diag(SL, diag::note_strncat_wrong_size) @@ -5468,3 +5475,410 @@ void Sema::DiagnoseEmptyLoopBody(const Stmt *S, Diag(NBody->getSemiLoc(), diag::note_empty_body_on_separate_line); } } + +//===--- Layout compatibility ----------------------------------------------// + +namespace { + +bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2); + +/// \brief Check if two enumeration types are layout-compatible. +bool isLayoutCompatible(ASTContext &C, EnumDecl *ED1, EnumDecl *ED2) { + // C++11 [dcl.enum] p8: + // Two enumeration types are layout-compatible if they have the same + // underlying type. + return ED1->isComplete() && ED2->isComplete() && + C.hasSameType(ED1->getIntegerType(), ED2->getIntegerType()); +} + +/// \brief Check if two fields are layout-compatible. +bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1, FieldDecl *Field2) { + if (!isLayoutCompatible(C, Field1->getType(), Field2->getType())) + return false; + + if (Field1->isBitField() != Field2->isBitField()) + return false; + + if (Field1->isBitField()) { + // Make sure that the bit-fields are the same length. + unsigned Bits1 = Field1->getBitWidthValue(C); + unsigned Bits2 = Field2->getBitWidthValue(C); + + if (Bits1 != Bits2) + return false; + } + + return true; +} + +/// \brief Check if two standard-layout structs are layout-compatible. +/// (C++11 [class.mem] p17) +bool isLayoutCompatibleStruct(ASTContext &C, + RecordDecl *RD1, + RecordDecl *RD2) { + // If both records are C++ classes, check that base classes match. + if (const CXXRecordDecl *D1CXX = dyn_cast(RD1)) { + // If one of records is a CXXRecordDecl we are in C++ mode, + // thus the other one is a CXXRecordDecl, too. + const CXXRecordDecl *D2CXX = cast(RD2); + // Check number of base classes. + if (D1CXX->getNumBases() != D2CXX->getNumBases()) + return false; + + // Check the base classes. + for (CXXRecordDecl::base_class_const_iterator + Base1 = D1CXX->bases_begin(), + BaseEnd1 = D1CXX->bases_end(), + Base2 = D2CXX->bases_begin(); + Base1 != BaseEnd1; + ++Base1, ++Base2) { + if (!isLayoutCompatible(C, Base1->getType(), Base2->getType())) + return false; + } + } else if (const CXXRecordDecl *D2CXX = dyn_cast(RD2)) { + // If only RD2 is a C++ class, it should have zero base classes. + if (D2CXX->getNumBases() > 0) + return false; + } + + // Check the fields. + RecordDecl::field_iterator Field2 = RD2->field_begin(), + Field2End = RD2->field_end(), + Field1 = RD1->field_begin(), + Field1End = RD1->field_end(); + for ( ; Field1 != Field1End && Field2 != Field2End; ++Field1, ++Field2) { + if (!isLayoutCompatible(C, *Field1, *Field2)) + return false; + } + if (Field1 != Field1End || Field2 != Field2End) + return false; + + return true; +} + +/// \brief Check if two standard-layout unions are layout-compatible. +/// (C++11 [class.mem] p18) +bool isLayoutCompatibleUnion(ASTContext &C, + RecordDecl *RD1, + RecordDecl *RD2) { + llvm::SmallPtrSet UnmatchedFields; + for (RecordDecl::field_iterator Field2 = RD2->field_begin(), + Field2End = RD2->field_end(); + Field2 != Field2End; ++Field2) { + UnmatchedFields.insert(*Field2); + } + + for (RecordDecl::field_iterator Field1 = RD1->field_begin(), + Field1End = RD1->field_end(); + Field1 != Field1End; ++Field1) { + llvm::SmallPtrSet::iterator + I = UnmatchedFields.begin(), + E = UnmatchedFields.end(); + + for ( ; I != E; ++I) { + if (isLayoutCompatible(C, *Field1, *I)) { + bool Result = UnmatchedFields.erase(*I); + (void) Result; + assert(Result); + break; + } + } + if (I == E) + return false; + } + + return UnmatchedFields.empty(); +} + +bool isLayoutCompatible(ASTContext &C, RecordDecl *RD1, RecordDecl *RD2) { + if (RD1->isUnion() != RD2->isUnion()) + return false; + + if (RD1->isUnion()) + return isLayoutCompatibleUnion(C, RD1, RD2); + else + return isLayoutCompatibleStruct(C, RD1, RD2); +} + +/// \brief Check if two types are layout-compatible in C++11 sense. +bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2) { + if (T1.isNull() || T2.isNull()) + return false; + + // C++11 [basic.types] p11: + // If two types T1 and T2 are the same type, then T1 and T2 are + // layout-compatible types. + if (C.hasSameType(T1, T2)) + return true; + + T1 = T1.getCanonicalType().getUnqualifiedType(); + T2 = T2.getCanonicalType().getUnqualifiedType(); + + const Type::TypeClass TC1 = T1->getTypeClass(); + const Type::TypeClass TC2 = T2->getTypeClass(); + + if (TC1 != TC2) + return false; + + if (TC1 == Type::Enum) { + return isLayoutCompatible(C, + cast(T1)->getDecl(), + cast(T2)->getDecl()); + } else if (TC1 == Type::Record) { + if (!T1->isStandardLayoutType() || !T2->isStandardLayoutType()) + return false; + + return isLayoutCompatible(C, + cast(T1)->getDecl(), + cast(T2)->getDecl()); + } + + return false; +} +} + +//===--- CHECK: pointer_with_type_tag attribute: datatypes should match ----// + +namespace { +/// \brief Given a type tag expression find the type tag itself. +/// +/// \param TypeExpr Type tag expression, as it appears in user's code. +/// +/// \param VD Declaration of an identifier that appears in a type tag. +/// +/// \param MagicValue Type tag magic value. +bool FindTypeTagExpr(const Expr *TypeExpr, const ASTContext &Ctx, + const ValueDecl **VD, uint64_t *MagicValue) { + while(true) { + if (!TypeExpr) + return false; + + TypeExpr = TypeExpr->IgnoreParenImpCasts()->IgnoreParenCasts(); + + switch (TypeExpr->getStmtClass()) { + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UO = cast(TypeExpr); + if (UO->getOpcode() == UO_AddrOf || UO->getOpcode() == UO_Deref) { + TypeExpr = UO->getSubExpr(); + continue; + } + return false; + } + + case Stmt::DeclRefExprClass: { + const DeclRefExpr *DRE = cast(TypeExpr); + *VD = DRE->getDecl(); + return true; + } + + case Stmt::IntegerLiteralClass: { + const IntegerLiteral *IL = cast(TypeExpr); + llvm::APInt MagicValueAPInt = IL->getValue(); + if (MagicValueAPInt.getActiveBits() <= 64) { + *MagicValue = MagicValueAPInt.getZExtValue(); + return true; + } else + return false; + } + + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: { + const AbstractConditionalOperator *ACO = + cast(TypeExpr); + bool Result; + if (ACO->getCond()->EvaluateAsBooleanCondition(Result, Ctx)) { + if (Result) + TypeExpr = ACO->getTrueExpr(); + else + TypeExpr = ACO->getFalseExpr(); + continue; + } + return false; + } + + case Stmt::BinaryOperatorClass: { + const BinaryOperator *BO = cast(TypeExpr); + if (BO->getOpcode() == BO_Comma) { + TypeExpr = BO->getRHS(); + continue; + } + return false; + } + + default: + return false; + } + } +} + +/// \brief Retrieve the C type corresponding to type tag TypeExpr. +/// +/// \param TypeExpr Expression that specifies a type tag. +/// +/// \param MagicValues Registered magic values. +/// +/// \param FoundWrongKind Set to true if a type tag was found, but of a wrong +/// kind. +/// +/// \param TypeInfo Information about the corresponding C type. +/// +/// \returns true if the corresponding C type was found. +bool GetMatchingCType( + const IdentifierInfo *ArgumentKind, + const Expr *TypeExpr, const ASTContext &Ctx, + const llvm::DenseMap *MagicValues, + bool &FoundWrongKind, + Sema::TypeTagData &TypeInfo) { + FoundWrongKind = false; + + // Variable declaration that has type_tag_for_datatype attribute. + const ValueDecl *VD = NULL; + + uint64_t MagicValue; + + if (!FindTypeTagExpr(TypeExpr, Ctx, &VD, &MagicValue)) + return false; + + if (VD) { + for (specific_attr_iterator + I = VD->specific_attr_begin(), + E = VD->specific_attr_end(); + I != E; ++I) { + if (I->getArgumentKind() != ArgumentKind) { + FoundWrongKind = true; + return false; + } + TypeInfo.Type = I->getMatchingCType(); + TypeInfo.LayoutCompatible = I->getLayoutCompatible(); + TypeInfo.MustBeNull = I->getMustBeNull(); + return true; + } + return false; + } + + if (!MagicValues) + return false; + + llvm::DenseMap::const_iterator I = + MagicValues->find(std::make_pair(ArgumentKind, MagicValue)); + if (I == MagicValues->end()) + return false; + + TypeInfo = I->second; + return true; +} +} // unnamed namespace + +void Sema::RegisterTypeTagForDatatype(const IdentifierInfo *ArgumentKind, + uint64_t MagicValue, QualType Type, + bool LayoutCompatible, + bool MustBeNull) { + if (!TypeTagForDatatypeMagicValues) + TypeTagForDatatypeMagicValues.reset( + new llvm::DenseMap); + + TypeTagMagicValue Magic(ArgumentKind, MagicValue); + (*TypeTagForDatatypeMagicValues)[Magic] = + TypeTagData(Type, LayoutCompatible, MustBeNull); +} + +namespace { +bool IsSameCharType(QualType T1, QualType T2) { + const BuiltinType *BT1 = T1->getAs(); + if (!BT1) + return false; + + const BuiltinType *BT2 = T2->getAs(); + if (!BT2) + return false; + + BuiltinType::Kind T1Kind = BT1->getKind(); + BuiltinType::Kind T2Kind = BT2->getKind(); + + return (T1Kind == BuiltinType::SChar && T2Kind == BuiltinType::Char_S) || + (T1Kind == BuiltinType::UChar && T2Kind == BuiltinType::Char_U) || + (T1Kind == BuiltinType::Char_U && T2Kind == BuiltinType::UChar) || + (T1Kind == BuiltinType::Char_S && T2Kind == BuiltinType::SChar); +} +} // unnamed namespace + +void Sema::CheckArgumentWithTypeTag(const ArgumentWithTypeTagAttr *Attr, + const Expr * const *ExprArgs) { + const IdentifierInfo *ArgumentKind = Attr->getArgumentKind(); + bool IsPointerAttr = Attr->getIsPointer(); + + const Expr *TypeTagExpr = ExprArgs[Attr->getTypeTagIdx()]; + bool FoundWrongKind; + TypeTagData TypeInfo; + if (!GetMatchingCType(ArgumentKind, TypeTagExpr, Context, + TypeTagForDatatypeMagicValues.get(), + FoundWrongKind, TypeInfo)) { + if (FoundWrongKind) + Diag(TypeTagExpr->getExprLoc(), + diag::warn_type_tag_for_datatype_wrong_kind) + << TypeTagExpr->getSourceRange(); + return; + } + + const Expr *ArgumentExpr = ExprArgs[Attr->getArgumentIdx()]; + if (IsPointerAttr) { + // Skip implicit cast of pointer to `void *' (as a function argument). + if (const ImplicitCastExpr *ICE = dyn_cast(ArgumentExpr)) + if (ICE->getType()->isVoidPointerType()) + ArgumentExpr = ICE->getSubExpr(); + } + QualType ArgumentType = ArgumentExpr->getType(); + + // Passing a `void*' pointer shouldn't trigger a warning. + if (IsPointerAttr && ArgumentType->isVoidPointerType()) + return; + + if (TypeInfo.MustBeNull) { + // Type tag with matching void type requires a null pointer. + if (!ArgumentExpr->isNullPointerConstant(Context, + Expr::NPC_ValueDependentIsNotNull)) { + Diag(ArgumentExpr->getExprLoc(), + diag::warn_type_safety_null_pointer_required) + << ArgumentKind->getName() + << ArgumentExpr->getSourceRange() + << TypeTagExpr->getSourceRange(); + } + return; + } + + QualType RequiredType = TypeInfo.Type; + if (IsPointerAttr) + RequiredType = Context.getPointerType(RequiredType); + + bool mismatch = false; + if (!TypeInfo.LayoutCompatible) { + mismatch = !Context.hasSameType(ArgumentType, RequiredType); + + // C++11 [basic.fundamental] p1: + // Plain char, signed char, and unsigned char are three distinct types. + // + // But we treat plain `char' as equivalent to `signed char' or `unsigned + // char' depending on the current char signedness mode. + if (mismatch) + if ((IsPointerAttr && IsSameCharType(ArgumentType->getPointeeType(), + RequiredType->getPointeeType())) || + (!IsPointerAttr && IsSameCharType(ArgumentType, RequiredType))) + mismatch = false; + } else + if (IsPointerAttr) + mismatch = !isLayoutCompatible(Context, + ArgumentType->getPointeeType(), + RequiredType->getPointeeType()); + else + mismatch = !isLayoutCompatible(Context, ArgumentType, RequiredType); + + if (mismatch) + Diag(ArgumentExpr->getExprLoc(), diag::warn_type_safety_type_mismatch) + << ArgumentType << ArgumentKind->getName() + << TypeInfo.LayoutCompatible << RequiredType + << ArgumentExpr->getSourceRange() + << TypeTagExpr->getSourceRange(); +} + diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp index 9fa757d..adf1327 100644 --- a/lib/Sema/SemaCodeComplete.cpp +++ b/lib/Sema/SemaCodeComplete.cpp @@ -4476,7 +4476,6 @@ static void AddObjCExpressionResults(ResultBuilder &Results, bool NeedAt) { Builder.AddResultTypeChunk("NSDictionary *"); Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"{")); Builder.AddPlaceholderChunk("key"); - Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddChunk(CodeCompletionString::CK_Colon); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("object, ..."); diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp index 3aae99a..ea181de 100644 --- a/lib/Sema/SemaDecl.cpp +++ b/lib/Sema/SemaDecl.cpp @@ -6174,6 +6174,7 @@ namespace { Decl *OrigDecl; bool isRecordType; bool isPODType; + bool isReferenceType; public: typedef EvaluatedExprVisitor Inherited; @@ -6182,9 +6183,11 @@ namespace { S(S), OrigDecl(OrigDecl) { isPODType = false; isRecordType = false; + isReferenceType = false; if (ValueDecl *VD = dyn_cast(OrigDecl)) { isPODType = VD->getType().isPODType(S.Context); isRecordType = VD->getType()->isRecordType(); + isReferenceType = VD->getType()->isReferenceType(); } } @@ -6192,9 +6195,9 @@ namespace { // to determine which DeclRefExpr's to check. Assume that the casts // are present and continue visiting the expression. void HandleExpr(Expr *E) { - // Skip checking T a = a where T is not a record type. Doing so is a - // way to silence uninitialized warnings. - if (isRecordType) + // Skip checking T a = a where T is not a record or reference type. + // Doing so is a way to silence uninitialized warnings. + if (isRecordType || isReferenceType) if (DeclRefExpr *DRE = dyn_cast(E)) HandleDeclRefExpr(DRE); @@ -6309,11 +6312,11 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, } // Check for self-references within variable initializers. - // Variables declared within a function/method body are handled - // by a dataflow analysis. + // Variables declared within a function/method body (except for references) + // are handled by a dataflow analysis. // Record types initialized by initializer list are handled here. // Initialization by constructors are handled in TryConstructorInitialization. - if (!VDecl->hasLocalStorage() && !VDecl->isStaticLocal() && + if ((!VDecl->hasLocalStorage() || VDecl->getType()->isReferenceType()) && (isa(Init) || !VDecl->getType()->isRecordType())) CheckSelfReference(RealDecl, Init); @@ -6754,6 +6757,10 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl, diag::err_abstract_type_in_decl, AbstractVariableType)) Var->setInvalidDecl(); + if (!Type->isDependentType() && !Var->isInvalidDecl() && + Var->getStorageClass() == SC_PrivateExtern) + Diag(Var->getLocation(), diag::warn_private_extern); + return; case VarDecl::TentativeDefinition: @@ -7027,6 +7034,42 @@ void Sema::FinalizeDeclaration(Decl *ThisDecl) { // Note that we are no longer parsing the initializer for this declaration. ParsingInitForAutoVars.erase(ThisDecl); + + // Now we have parsed the initializer and can update the table of magic + // tag values. + if (ThisDecl && ThisDecl->hasAttr()) { + const VarDecl *VD = dyn_cast(ThisDecl); + if (VD && VD->getType()->isIntegralOrEnumerationType()) { + for (specific_attr_iterator + I = ThisDecl->specific_attr_begin(), + E = ThisDecl->specific_attr_end(); + I != E; ++I) { + const Expr *MagicValueExpr = VD->getInit(); + if (!MagicValueExpr) { + continue; + } + llvm::APSInt MagicValueInt; + if (!MagicValueExpr->isIntegerConstantExpr(MagicValueInt, Context)) { + Diag(I->getRange().getBegin(), + diag::err_type_tag_for_datatype_not_ice) + << LangOpts.CPlusPlus << MagicValueExpr->getSourceRange(); + continue; + } + if (MagicValueInt.getActiveBits() > 64) { + Diag(I->getRange().getBegin(), + diag::err_type_tag_for_datatype_too_large) + << LangOpts.CPlusPlus << MagicValueExpr->getSourceRange(); + continue; + } + uint64_t MagicValue = MagicValueInt.getZExtValue(); + RegisterTypeTagForDatatype(I->getArgumentKind(), + MagicValue, + I->getMatchingCType(), + I->getLayoutCompatible(), + I->getMustBeNull()); + } + } + } } Sema::DeclGroupPtrTy @@ -7623,7 +7666,9 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D) { << FD->getName() << "dllimport"; } } - ActOnDocumentableDecl(FD); + // We want to attach documentation to original Decl (which might be + // a function template). + ActOnDocumentableDecl(D); return FD; } @@ -7750,7 +7795,8 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, // Verify that gotos and switch cases don't jump into scopes illegally. if (getCurFunction()->NeedsScopeChecking() && !dcl->isInvalidDecl() && - !hasAnyUnrecoverableErrorsInThisFunction()) + !hasAnyUnrecoverableErrorsInThisFunction() && + !PP.isCodeCompletionEnabled()) DiagnoseInvalidJumps(Body); if (CXXDestructorDecl *Destructor = dyn_cast(dcl)) { @@ -9907,6 +9953,13 @@ void Sema::ActOnFields(Scope* S, } } } + if (isa(EnclosingDecl) && + RequireNonAbstractType(FD->getLocation(), FD->getType(), + diag::err_abstract_type_in_decl, + AbstractIvarType)) { + // Ivars can not have abstract class types + FD->setInvalidDecl(); + } if (Record && FDTTy->getDecl()->hasObjectMember()) Record->setHasObjectMember(true); } else if (FDTy->isObjCObjectType()) { @@ -9915,8 +9968,7 @@ void Sema::ActOnFields(Scope* S, << FixItHint::CreateInsertion(FD->getLocation(), "*"); QualType T = Context.getObjCObjectPointerType(FD->getType()); FD->setType(T); - } - else if (!getLangOpts().CPlusPlus) { + } else if (!getLangOpts().CPlusPlus) { if (getLangOpts().ObjCAutoRefCount && Record && !ARCErrReported) { // It's an error in ARC if a field has lifetime. // We don't want to report this in a system header, though, diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp index 22bff86..caa7b2f 100644 --- a/lib/Sema/SemaDeclAttr.cpp +++ b/lib/Sema/SemaDeclAttr.cpp @@ -221,6 +221,53 @@ static bool checkAttributeAtLeastNumArgs(Sema &S, const AttributeList &Attr, return true; } +/// \brief Check if IdxExpr is a valid argument index for a function or +/// instance method D. May output an error. +/// +/// \returns true if IdxExpr is a valid index. +static bool checkFunctionOrMethodArgumentIndex(Sema &S, const Decl *D, + StringRef AttrName, + SourceLocation AttrLoc, + unsigned AttrArgNum, + const Expr *IdxExpr, + uint64_t &Idx) +{ + assert(isFunctionOrMethod(D) && hasFunctionProto(D)); + + // In C++ the implicit 'this' function parameter also counts. + // Parameters are counted from one. + const bool HasImplicitThisParam = isInstanceMethod(D); + const unsigned NumArgs = getFunctionOrMethodNumArgs(D) + HasImplicitThisParam; + const unsigned FirstIdx = 1; + + llvm::APSInt IdxInt; + if (IdxExpr->isTypeDependent() || IdxExpr->isValueDependent() || + !IdxExpr->isIntegerConstantExpr(IdxInt, S.Context)) { + S.Diag(AttrLoc, diag::err_attribute_argument_n_not_int) + << AttrName << AttrArgNum << IdxExpr->getSourceRange(); + return false; + } + + Idx = IdxInt.getLimitedValue(); + if (Idx < FirstIdx || (!isFunctionOrMethodVariadic(D) && Idx > NumArgs)) { + S.Diag(AttrLoc, diag::err_attribute_argument_out_of_bounds) + << AttrName << AttrArgNum << IdxExpr->getSourceRange(); + return false; + } + Idx--; // Convert to zero-based. + if (HasImplicitThisParam) { + if (Idx == 0) { + S.Diag(AttrLoc, + diag::err_attribute_invalid_implicit_this_argument) + << AttrName << IdxExpr->getSourceRange(); + return false; + } + --Idx; + } + + return true; +} + /// /// \brief Check if passed in Decl is a field or potentially shared global var /// \return true if the Decl is a field or potentially shared global variable @@ -3523,25 +3570,16 @@ static void handleCallConvAttr(Sema &S, Decl *D, const AttributeList &Attr) { D->addAttr(::new (S.Context) PascalAttr(Attr.getRange(), S.Context)); return; case AttributeList::AT_Pcs: { - Expr *Arg = Attr.getArg(0); - StringLiteral *Str = dyn_cast(Arg); - if (!Str || !Str->isAscii()) { - S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string) - << "pcs" << 1; - Attr.setInvalid(); - return; - } - - StringRef StrRef = Str->getString(); PcsAttr::PCSType PCS; - if (StrRef == "aapcs") + switch (CC) { + case CC_AAPCS: PCS = PcsAttr::AAPCS; - else if (StrRef == "aapcs-vfp") + break; + case CC_AAPCS_VFP: PCS = PcsAttr::AAPCS_VFP; - else { - S.Diag(Attr.getLoc(), diag::err_invalid_pcs); - Attr.setInvalid(); - return; + break; + default: + llvm_unreachable("unexpected calling convention in pcs attribute"); } D->addAttr(::new (S.Context) PcsAttr(Attr.getRange(), S.Context, PCS)); @@ -3560,10 +3598,9 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC) { if (attr.isInvalid()) return true; - if ((attr.getNumArgs() != 0 && - !(attr.getKind() == AttributeList::AT_Pcs && attr.getNumArgs() == 1)) || - attr.getParameterName()) { - Diag(attr.getLoc(), diag::err_attribute_wrong_number_arguments) << 0; + unsigned ReqArgs = attr.getKind() == AttributeList::AT_Pcs ? 1 : 0; + if (attr.getNumArgs() != ReqArgs || attr.getParameterName()) { + Diag(attr.getLoc(), diag::err_attribute_wrong_number_arguments) << ReqArgs; attr.setInvalid(); return true; } @@ -3594,7 +3631,10 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC) { CC = CC_AAPCS_VFP; break; } - // FALLS THROUGH + + attr.setInvalid(); + Diag(attr.getLoc(), diag::err_invalid_pcs); + return true; } default: llvm_unreachable("unexpected attribute kind"); } @@ -3703,6 +3743,79 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D, const AttributeList &Attr){ } } +static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + StringRef AttrName = Attr.getName()->getName(); + if (!Attr.getParameterName()) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_identifier) + << Attr.getName() << /* arg num = */ 1; + return; + } + + if (Attr.getNumArgs() != 2) { + S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) + << /* required args = */ 3; + return; + } + + IdentifierInfo *ArgumentKind = Attr.getParameterName(); + + if (!isFunctionOrMethod(D) || !hasFunctionProto(D)) { + S.Diag(Attr.getLoc(), diag::err_attribute_wrong_decl_type) + << Attr.getName() << ExpectedFunctionOrMethod; + return; + } + + uint64_t ArgumentIdx; + if (!checkFunctionOrMethodArgumentIndex(S, D, AttrName, + Attr.getLoc(), 2, + Attr.getArg(0), ArgumentIdx)) + return; + + uint64_t TypeTagIdx; + if (!checkFunctionOrMethodArgumentIndex(S, D, AttrName, + Attr.getLoc(), 3, + Attr.getArg(1), TypeTagIdx)) + return; + + bool IsPointer = (AttrName == "pointer_with_type_tag"); + if (IsPointer) { + // Ensure that buffer has a pointer type. + QualType BufferTy = getFunctionOrMethodArgType(D, ArgumentIdx); + if (!BufferTy->isPointerType()) { + S.Diag(Attr.getLoc(), diag::err_attribute_pointers_only) + << AttrName; + } + } + + D->addAttr(::new (S.Context) ArgumentWithTypeTagAttr(Attr.getRange(), + S.Context, + ArgumentKind, + ArgumentIdx, + TypeTagIdx, + IsPointer)); +} + +static void handleTypeTagForDatatypeAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + IdentifierInfo *PointerKind = Attr.getParameterName(); + if (!PointerKind) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_identifier) + << "type_tag_for_datatype" << 1; + return; + } + + QualType MatchingCType = S.GetTypeFromParser(Attr.getMatchingCType(), NULL); + + D->addAttr(::new (S.Context) TypeTagForDatatypeAttr( + Attr.getRange(), + S.Context, + PointerKind, + MatchingCType, + Attr.getLayoutCompatible(), + Attr.getMustBeNull())); +} + //===----------------------------------------------------------------------===// // Checker-specific attribute handlers. //===----------------------------------------------------------------------===// @@ -4333,6 +4446,14 @@ static void ProcessInheritableDeclAttr(Sema &S, Scope *scope, Decl *D, handleAcquiredAfterAttr(S, D, Attr); break; + // Type safety attributes. + case AttributeList::AT_ArgumentWithTypeTag: + handleArgumentWithTypeTagAttr(S, D, Attr); + break; + case AttributeList::AT_TypeTagForDatatype: + handleTypeTagForDatatypeAttr(S, D, Attr); + break; + default: // Ask target about the attribute. const TargetAttributesSema &TargetAttrs = S.getTargetAttributesSema(); diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp index 1d45a68..eeac9b8 100644 --- a/lib/Sema/SemaDeclCXX.cpp +++ b/lib/Sema/SemaDeclCXX.cpp @@ -9807,7 +9807,7 @@ Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc, if (!Failed && !Cond) { llvm::SmallString<256> MsgBuffer; llvm::raw_svector_ostream Msg(MsgBuffer); - AssertMessage->printPretty(Msg, Context, 0, getPrintingPolicy()); + AssertMessage->printPretty(Msg, 0, getPrintingPolicy()); Diag(StaticAssertLoc, diag::err_static_assert_failed) << Msg.str() << AssertExpr->getSourceRange(); Failed = true; diff --git a/lib/Sema/SemaExceptionSpec.cpp b/lib/Sema/SemaExceptionSpec.cpp index 63bfa9d..e6266fb 100644 --- a/lib/Sema/SemaExceptionSpec.cpp +++ b/lib/Sema/SemaExceptionSpec.cpp @@ -242,8 +242,7 @@ bool Sema::CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New) { case EST_ComputedNoexcept: OS << "noexcept("; - OldProto->getNoexceptExpr()->printPretty(OS, Context, 0, - getPrintingPolicy()); + OldProto->getNoexceptExpr()->printPretty(OS, 0, getPrintingPolicy()); OS << ")"; break; diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index 6a503ee..3875ba1 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -9461,7 +9461,8 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, // If needed, diagnose invalid gotos and switches in the block. if (getCurFunction()->NeedsScopeChecking() && - !hasAnyUnrecoverableErrorsInThisFunction()) + !hasAnyUnrecoverableErrorsInThisFunction() && + !PP.isCodeCompletionEnabled()) DiagnoseInvalidJumps(cast(Body)); BSI->TheDecl->setBody(cast(Body)); diff --git a/lib/Sema/SemaExprMember.cpp b/lib/Sema/SemaExprMember.cpp index 53f22f6..8f445e2 100644 --- a/lib/Sema/SemaExprMember.cpp +++ b/lib/Sema/SemaExprMember.cpp @@ -1137,7 +1137,7 @@ Sema::LookupMemberExpr(LookupResult &R, ExprResult &BaseExpr, goto fail; // There's an implicit 'isa' ivar on all objects. // But we only actually find it this way on objects of type 'id', - // apparently.ghjg + // apparently. if (OTy->isObjCId() && Member->isStr("isa")) { Diag(MemberLoc, diag::warn_objc_isa_use); return Owned(new (Context) ObjCIsaExpr(BaseExpr.take(), IsArrow, MemberLoc, diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp index a874489..9382f7d 100644 --- a/lib/Sema/SemaOverload.cpp +++ b/lib/Sema/SemaOverload.cpp @@ -57,7 +57,7 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, StandardConversionSequence &SCS, bool CStyle, bool AllowObjCWritebackConversion); - + static bool IsTransparentUnionStandardConversion(Sema &S, Expr* From, QualType &ToType, bool InOverloadResolution, diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp index d22deb2..86884b7 100644 --- a/lib/Sema/SemaStmt.cpp +++ b/lib/Sema/SemaStmt.cpp @@ -28,6 +28,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" @@ -35,11 +36,15 @@ #include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" @@ -1516,7 +1521,6 @@ Sema::CheckObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) { StmtResult Sema::ActOnObjCForCollectionStmt(SourceLocation ForLoc, - SourceLocation LParenLoc, Stmt *First, Expr *collection, SourceLocation RParenLoc) { @@ -1698,9 +1702,9 @@ static bool ObjCEnumerationCollection(Expr *Collection) { && Collection->getType()->getAs() != 0; } -/// ActOnCXXForRangeStmt - Check and build a C++0x for-range statement. +/// ActOnCXXForRangeStmt - Check and build a C++11 for-range statement. /// -/// C++0x [stmt.ranged]: +/// C++11 [stmt.ranged]: /// A range-based for statement is equivalent to /// /// { @@ -1717,15 +1721,14 @@ static bool ObjCEnumerationCollection(Expr *Collection) { /// The body of the loop is not available yet, since it cannot be analysed until /// we have determined the type of the for-range-declaration. StmtResult -Sema::ActOnCXXForRangeStmt(SourceLocation ForLoc, SourceLocation LParenLoc, +Sema::ActOnCXXForRangeStmt(SourceLocation ForLoc, Stmt *First, SourceLocation ColonLoc, Expr *Range, SourceLocation RParenLoc) { if (!First || !Range) return StmtError(); if (ObjCEnumerationCollection(Range)) - return ActOnObjCForCollectionStmt(ForLoc, LParenLoc, First, Range, - RParenLoc); + return ActOnObjCForCollectionStmt(ForLoc, First, Range, RParenLoc); DeclStmt *DS = dyn_cast(First); assert(DS && "first part of for range not a decl stmt"); @@ -2759,165 +2762,225 @@ StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, bool IsSimple, return Owned(NS); } -// needSpaceAsmToken - This function handles whitespace around asm punctuation. -// Returns true if a space should be emitted. -static inline bool needSpaceAsmToken(Token currTok) { - static Token prevTok; - - // No need for space after prevToken. - switch(prevTok.getKind()) { - default: - break; - case tok::l_square: - case tok::r_square: - case tok::l_brace: - case tok::r_brace: - case tok::colon: - prevTok = currTok; - return false; - } +// isMSAsmKeyword - Return true if this is an MS-style inline asm keyword. These +// require special handling. +static bool isMSAsmKeyword(StringRef Name) { + bool Ret = llvm::StringSwitch(Name) + .Cases("EVEN", "ALIGN", true) // Alignment directives. + .Cases("LENGTH", "SIZE", "TYPE", true) // Type and variable sizes. + .Case("_emit", true) // _emit Pseudoinstruction. + .Default(false); + return Ret; +} - // No need for a space before currToken. - switch(currTok.getKind()) { - default: - break; - case tok::l_square: - case tok::r_square: - case tok::l_brace: - case tok::r_brace: - case tok::comma: - case tok::colon: - prevTok = currTok; - return false; - } - prevTok = currTok; - return true; +static StringRef getSpelling(Sema &SemaRef, Token AsmTok) { + StringRef Asm; + SmallString<512> TokenBuf; + TokenBuf.resize(512); + bool StringInvalid = false; + Asm = SemaRef.PP.getSpelling(AsmTok, TokenBuf, &StringInvalid); + assert (!StringInvalid && "Expected valid string!"); + return Asm; } static void patchMSAsmStrings(Sema &SemaRef, bool &IsSimple, SourceLocation AsmLoc, ArrayRef AsmToks, - ArrayRef LineEnds, const TargetInfo &TI, + std::vector &AsmRegs, + std::vector &AsmNames, std::vector &AsmStrings) { assert (!AsmToks.empty() && "Didn't expect an empty AsmToks!"); - // Assume simple asm stmt until we parse a non-register identifer. + // Assume simple asm stmt until we parse a non-register identifer (or we just + // need to bail gracefully). IsSimple = true; - for (unsigned i = 0, e = LineEnds.size(); i != e; ++i) { - SmallString<512> Asm; + SmallString<512> Asm; + unsigned NumAsmStrings = 0; + for (unsigned i = 0, e = AsmToks.size(); i != e; ++i) { + + // Determine if this should be considered a new asm. + bool isNewAsm = i == 0 || AsmToks[i].isAtStartOfLine() || + AsmToks[i].is(tok::kw_asm); + + // Emit the previous asm string. + if (i && isNewAsm) { + AsmStrings[NumAsmStrings++] = Asm.c_str(); + if (AsmToks[i].is(tok::kw_asm)) { + ++i; // Skip __asm + assert (i != e && "Expected another token."); + } + } - // Check the operands. - for (unsigned j = (i == 0) ? 0 : LineEnds[i-1], e = LineEnds[i]; j != e; ++j) { + // Start a new asm string with the opcode. + if (isNewAsm) { + AsmRegs[NumAsmStrings].resize(AsmToks.size()); + AsmNames[NumAsmStrings].resize(AsmToks.size()); - IdentifierInfo *II; - if (j == 0 || (i > 0 && j == LineEnds[i-1])) { - II = AsmToks[j].getIdentifierInfo(); - Asm = II->getName().str(); - continue; + StringRef Piece = AsmToks[i].getIdentifierInfo()->getName(); + // MS-style inline asm keywords require special handling. + if (isMSAsmKeyword(Piece)) + IsSimple = false; + + // TODO: Verify this is a valid opcode. + Asm = Piece; + continue; + } + + if (i && AsmToks[i].hasLeadingSpace()) + Asm += ' '; + + // Check the operand(s). + switch (AsmToks[i].getKind()) { + default: + IsSimple = false; + Asm += getSpelling(SemaRef, AsmToks[i]); + break; + case tok::comma: Asm += ","; break; + case tok::colon: Asm += ":"; break; + case tok::l_square: Asm += "["; break; + case tok::r_square: Asm += "]"; break; + case tok::l_brace: Asm += "{"; break; + case tok::r_brace: Asm += "}"; break; + case tok::numeric_constant: + Asm += getSpelling(SemaRef, AsmToks[i]); + break; + case tok::identifier: { + IdentifierInfo *II = AsmToks[i].getIdentifierInfo(); + StringRef Name = II->getName(); + + // Valid register? + if (TI.isValidGCCRegisterName(Name)) { + AsmRegs[NumAsmStrings].set(i); + Asm += Name; + break; } - if (needSpaceAsmToken(AsmToks[j])) - Asm += " "; + IsSimple = false; - switch (AsmToks[j].getKind()) { - default: - //llvm_unreachable("Unknown token."); + // MS-style inline asm keywords require special handling. + if (isMSAsmKeyword(Name)) { + IsSimple = false; + Asm += Name; break; - case tok::comma: Asm += ","; break; - case tok::colon: Asm += ":"; break; - case tok::l_square: Asm += "["; break; - case tok::r_square: Asm += "]"; break; - case tok::l_brace: Asm += "{"; break; - case tok::r_brace: Asm += "}"; break; - case tok::numeric_constant: { - SmallString<32> TokenBuf; - TokenBuf.resize(32); - bool StringInvalid = false; - Asm += SemaRef.PP.getSpelling(AsmToks[j], TokenBuf, &StringInvalid); - assert (!StringInvalid && "Expected valid string!"); + } + + // FIXME: Why are we missing this segment register? + if (Name == "fs") { + Asm += Name; break; } - case tok::identifier: { - II = AsmToks[j].getIdentifierInfo(); - StringRef Name = II->getName(); - // Valid registers don't need modification. - if (TI.isValidGCCRegisterName(Name)) { - Asm += Name; - break; - } + // Lookup the identifier. + // TODO: Someone with more experience with clang should verify this the + // proper way of doing a symbol lookup. + DeclarationName DeclName(II); + Scope *CurScope = SemaRef.getCurScope(); + LookupResult R(SemaRef, DeclName, AsmLoc, Sema::LookupOrdinaryName); + if (!SemaRef.LookupName(R, CurScope, false/*AllowBuiltinCreation*/)) + break; - // TODO: Lookup the identifier. - IsSimple = false; + assert (R.isSingleResult() && "Expected a single result?!"); + NamedDecl *Decl = R.getFoundDecl(); + switch (Decl->getKind()) { + default: + assert(0 && "Unknown decl kind."); + break; + case Decl::Var: { + case Decl::ParmVar: + AsmNames[NumAsmStrings].set(i); + + VarDecl *Var = cast(Decl); + QualType Ty = Var->getType(); + (void)Ty; // Avoid warning. + // TODO: Patch identifier with valid operand. One potential idea is to + // probe the backend with type information to guess the possible + // operand. + break; + } } - } // AsmToks[i].getKind() + break; + } } - AsmStrings[i] = Asm.c_str(); } + + // Emit the final (and possibly only) asm string. + AsmStrings[NumAsmStrings] = Asm.c_str(); } // Build the unmodified MSAsmString. static std::string buildMSAsmString(Sema &SemaRef, ArrayRef AsmToks, - ArrayRef LineEnds) { + unsigned &NumAsmStrings) { assert (!AsmToks.empty() && "Didn't expect an empty AsmToks!"); + NumAsmStrings = 0; + SmallString<512> Asm; - SmallString<512> TokenBuf; - TokenBuf.resize(512); - unsigned AsmLineNum = 0; for (unsigned i = 0, e = AsmToks.size(); i < e; ++i) { - const char *ThisTokBuf = &TokenBuf[0]; - bool StringInvalid = false; - unsigned ThisTokLen = - Lexer::getSpelling(AsmToks[i], ThisTokBuf, SemaRef.getSourceManager(), - SemaRef.getLangOpts(), &StringInvalid); - if (i && (!AsmLineNum || i != LineEnds[AsmLineNum-1]) && - needSpaceAsmToken(AsmToks[i])) - Asm += ' '; - Asm += StringRef(ThisTokBuf, ThisTokLen); - if (i + 1 == LineEnds[AsmLineNum] && i + 1 != AsmToks.size()) { - Asm += '\n'; - ++AsmLineNum; + bool isNewAsm = i == 0 || AsmToks[i].isAtStartOfLine() || + AsmToks[i].is(tok::kw_asm); + + if (isNewAsm) { + ++NumAsmStrings; + if (i) + Asm += '\n'; + if (AsmToks[i].is(tok::kw_asm)) { + i++; // Skip __asm + assert (i != e && "Expected another token"); + } } + + if (i && AsmToks[i].hasLeadingSpace() && !isNewAsm) + Asm += ' '; + + Asm += getSpelling(SemaRef, AsmToks[i]); } return Asm.c_str(); } StmtResult Sema::ActOnMSAsmStmt(SourceLocation AsmLoc, + SourceLocation LBraceLoc, ArrayRef AsmToks, - ArrayRef LineEnds, SourceLocation EndLoc) { // MS-style inline assembly is not fully supported, so emit a warning. Diag(AsmLoc, diag::warn_unsupported_msasm); SmallVector Clobbers; + std::set ClobberRegs; + SmallVector Inputs; + SmallVector Outputs; // Empty asm statements don't need to instantiate the AsmParser, etc. if (AsmToks.empty()) { StringRef AsmString; MSAsmStmt *NS = - new (Context) MSAsmStmt(Context, AsmLoc, /* IsSimple */ true, - /* IsVolatile */ true, AsmToks, LineEnds, + new (Context) MSAsmStmt(Context, AsmLoc, LBraceLoc, /*IsSimple*/ true, + /*IsVolatile*/ true, AsmToks, Inputs, Outputs, AsmString, Clobbers, EndLoc); return Owned(NS); } - std::string AsmString = buildMSAsmString(*this, AsmToks, LineEnds); + unsigned NumAsmStrings; + std::string AsmString = buildMSAsmString(*this, AsmToks, NumAsmStrings); bool IsSimple; + std::vector Regs; + std::vector Names; std::vector PatchedAsmStrings; - PatchedAsmStrings.resize(LineEnds.size()); + + Regs.resize(NumAsmStrings); + Names.resize(NumAsmStrings); + PatchedAsmStrings.resize(NumAsmStrings); // Rewrite operands to appease the AsmParser. - patchMSAsmStrings(*this, IsSimple, AsmLoc, AsmToks, LineEnds, - Context.getTargetInfo(), PatchedAsmStrings); + patchMSAsmStrings(*this, IsSimple, AsmLoc, AsmToks, + Context.getTargetInfo(), Regs, Names, PatchedAsmStrings); // patchMSAsmStrings doesn't correctly patch non-simple asm statements. if (!IsSimple) { MSAsmStmt *NS = - new (Context) MSAsmStmt(Context, AsmLoc, /* IsSimple */ true, - /* IsVolatile */ true, AsmToks, LineEnds, + new (Context) MSAsmStmt(Context, AsmLoc, LBraceLoc, /*IsSimple*/ true, + /*IsVolatile*/ true, AsmToks, Inputs, Outputs, AsmString, Clobbers, EndLoc); return Owned(NS); } @@ -2947,7 +3010,7 @@ StmtResult Sema::ActOnMSAsmStmt(SourceLocation AsmLoc, // Tell SrcMgr about this buffer, which is what the parser will pick up. SrcMgr.AddNewSourceBuffer(Buffer, llvm::SMLoc()); - OwningPtr Str; + OwningPtr Str(createNullStreamer(Ctx)); OwningPtr Parser(createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); OwningPtr @@ -2956,13 +3019,63 @@ StmtResult Sema::ActOnMSAsmStmt(SourceLocation AsmLoc, Parser->setAssemblerDialect(1); Parser->setTargetParser(*TargetParser.get()); - // TODO: Start parsing. + // Prime the lexer. + Parser->Lex(); + + // Parse the opcode. + StringRef IDVal; + Parser->ParseIdentifier(IDVal); + + // Canonicalize the opcode to lower case. + SmallString<128> Opcode; + for (unsigned i = 0, e = IDVal.size(); i != e; ++i) + Opcode.push_back(tolower(IDVal[i])); + + // Parse the operands. + llvm::SMLoc IDLoc; + SmallVector Operands; + bool HadError = TargetParser->ParseInstruction(Opcode.str(), IDLoc, + Operands); + assert (!HadError && "Unexpected error parsing instruction"); + + // Match the MCInstr. + SmallVector Instrs; + HadError = TargetParser->MatchInstruction(IDLoc, Operands, Instrs); + assert (!HadError && "Unexpected error matching instruction"); + assert ((Instrs.size() == 1) && "Expected only a single instruction."); + + // Get the instruction descriptor. + llvm::MCInst Inst = Instrs[0]; + const llvm::MCInstrInfo *MII = TheTarget->createMCInstrInfo(); + const llvm::MCInstrDesc &Desc = MII->get(Inst.getOpcode()); + llvm::MCInstPrinter *IP = + TheTarget->createMCInstPrinter(1, *MAI, *MII, *MRI, *STI); + + // Build the list of clobbers. + for (unsigned i = 0, e = Desc.getNumDefs(); i != e; ++i) { + const llvm::MCOperand &Op = Inst.getOperand(i); + if (!Op.isReg()) + continue; + + std::string Reg; + llvm::raw_string_ostream OS(Reg); + IP->printRegName(OS, Op.getReg()); + + StringRef Clobber(OS.str()); + if (!Context.getTargetInfo().isValidClobber(Clobber)) + return StmtError(Diag(AsmLoc, diag::err_asm_unknown_register_name) << + Clobber); + ClobberRegs.insert(Reg); + } } + for (std::set::iterator I = ClobberRegs.begin(), + E = ClobberRegs.end(); I != E; ++I) + Clobbers.push_back(*I); MSAsmStmt *NS = - new (Context) MSAsmStmt(Context, AsmLoc, IsSimple, /* IsVolatile */ true, - AsmToks, LineEnds, AsmString, Clobbers, EndLoc); - + new (Context) MSAsmStmt(Context, AsmLoc, LBraceLoc, IsSimple, + /*IsVolatile*/ true, AsmToks, Inputs, Outputs, + AsmString, Clobbers, EndLoc); return Owned(NS); } diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp index c8e4501..4dbf3e4 100644 --- a/lib/Sema/SemaTemplate.cpp +++ b/lib/Sema/SemaTemplate.cpp @@ -5518,6 +5518,13 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec, if (Attr) ProcessDeclAttributeList(S, Specialization, Attr); + // Add alignment attributes if necessary; these attributes are checked when + // the ASTContext lays out the structure. + if (TUK == TUK_Definition) { + AddAlignmentAttributesForRecord(Specialization); + AddMsStructLayoutForRecord(Specialization); + } + if (ModulePrivateLoc.isValid()) Diag(Specialization->getLocation(), diag::err_module_private_specialization) << (isPartialSpecialization? 1 : 0) diff --git a/lib/Sema/SemaTemplateInstantiate.cpp b/lib/Sema/SemaTemplateInstantiate.cpp index c7cbc41..20e755f 100644 --- a/lib/Sema/SemaTemplateInstantiate.cpp +++ b/lib/Sema/SemaTemplateInstantiate.cpp @@ -988,12 +988,11 @@ TemplateInstantiator::RebuildElaboratedType(SourceLocation KeywordLoc, SourceLocation TagLocation = KeywordLoc; - // FIXME: type might be anonymous. IdentifierInfo *Id = TD->getIdentifier(); // TODO: should we even warn on struct/class mismatches for this? Seems // like it's likely to produce a lot of spurious errors. - if (Keyword != ETK_None && Keyword != ETK_Typename) { + if (Id && Keyword != ETK_None && Keyword != ETK_Typename) { TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForKeyword(Keyword); if (!SemaRef.isAcceptableTagRedeclaration(TD, Kind, /*isDefinition*/false, TagLocation, *Id)) { diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp index 20fe036..54f8dba 100644 --- a/lib/Sema/SemaType.cpp +++ b/lib/Sema/SemaType.cpp @@ -2258,6 +2258,51 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, ASM = ArrayType::Normal; D.setInvalidType(true); } + + // C99 6.7.5.2p1: The optional type qualifiers and the keyword static + // shall appear only in a declaration of a function parameter with an + // array type, ... + if (ASM == ArrayType::Static || ATI.TypeQuals) { + if (!(D.isPrototypeContext() || + D.getContext() == Declarator::KNRTypeListContext)) { + S.Diag(DeclType.Loc, diag::err_array_static_outside_prototype) << + (ASM == ArrayType::Static ? "'static'" : "type qualifier"); + // Remove the 'static' and the type qualifiers. + if (ASM == ArrayType::Static) + ASM = ArrayType::Normal; + ATI.TypeQuals = 0; + D.setInvalidType(true); + } + + // C99 6.7.5.2p1: ... and then only in the outermost array type + // derivation. + unsigned x = chunkIndex; + while (x != 0) { + // Walk outwards along the declarator chunks. + x--; + const DeclaratorChunk &DC = D.getTypeObject(x); + switch (DC.Kind) { + case DeclaratorChunk::Paren: + continue; + case DeclaratorChunk::Array: + case DeclaratorChunk::Pointer: + case DeclaratorChunk::Reference: + case DeclaratorChunk::MemberPointer: + S.Diag(DeclType.Loc, diag::err_array_static_not_outermost) << + (ASM == ArrayType::Static ? "'static'" : "type qualifier"); + if (ASM == ArrayType::Static) + ASM = ArrayType::Normal; + ATI.TypeQuals = 0; + D.setInvalidType(true); + break; + case DeclaratorChunk::Function: + case DeclaratorChunk::BlockPointer: + // These are invalid anyway, so just ignore. + break; + } + } + } + T = S.BuildArrayType(T, ASM, ArraySize, ATI.TypeQuals, SourceRange(DeclType.Loc, DeclType.EndLoc), Name); break; @@ -3224,7 +3269,6 @@ namespace { assert(Chunk.Kind == DeclaratorChunk::Function); TL.setLocalRangeBegin(Chunk.Loc); TL.setLocalRangeEnd(Chunk.EndLoc); - TL.setTrailingReturn(Chunk.Fun.hasTrailingReturnType()); const DeclaratorChunk::FunctionTypeInfo &FTI = Chunk.Fun; for (unsigned i = 0, e = TL.getNumArgs(), tpi = 0; i != e; ++i) { diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h index 90d5840..619ad33 100644 --- a/lib/Sema/TreeTransform.h +++ b/lib/Sema/TreeTransform.h @@ -1185,10 +1185,10 @@ public: /// By default, performs semantic analysis to build the new statement. /// Subclasses may override this routine to provide different behavior. StmtResult RebuildMSAsmStmt(SourceLocation AsmLoc, + SourceLocation LBraceLoc, ArrayRef AsmToks, - ArrayRef LineEnds, SourceLocation EndLoc) { - return getSema().ActOnMSAsmStmt(AsmLoc, AsmToks, LineEnds, EndLoc); + return getSema().ActOnMSAsmStmt(AsmLoc, LBraceLoc, AsmToks, EndLoc); } /// \brief Build a new Objective-C \@try statement. @@ -1277,12 +1277,11 @@ public: /// By default, performs semantic analysis to build the new statement. /// Subclasses may override this routine to provide different behavior. StmtResult RebuildObjCForCollectionStmt(SourceLocation ForLoc, - SourceLocation LParenLoc, Stmt *Element, Expr *Collection, SourceLocation RParenLoc, Stmt *Body) { - StmtResult ForEachStmt = getSema().ActOnObjCForCollectionStmt(ForLoc, LParenLoc, + StmtResult ForEachStmt = getSema().ActOnObjCForCollectionStmt(ForLoc, Element, Collection, RParenLoc); @@ -4205,7 +4204,7 @@ TreeTransform::TransformFunctionProtoType(TypeLocBuilder &TLB, QualType ResultType; - if (TL.getTrailingReturn()) { + if (T->hasTrailingReturn()) { if (getDerived().TransformFunctionTypeParams(TL.getBeginLoc(), TL.getParmArray(), TL.getNumArgs(), @@ -4262,7 +4261,6 @@ TreeTransform::TransformFunctionProtoType(TypeLocBuilder &TLB, FunctionProtoTypeLoc NewTL = TLB.push(Result); NewTL.setLocalRangeBegin(TL.getLocalRangeBegin()); NewTL.setLocalRangeEnd(TL.getLocalRangeEnd()); - NewTL.setTrailingReturn(TL.getTrailingReturn()); for (unsigned i = 0, e = NewTL.getNumArgs(); i != e; ++i) NewTL.setArg(i, ParamDecls[i]); @@ -4286,7 +4284,6 @@ QualType TreeTransform::TransformFunctionNoProtoType( FunctionNoProtoTypeLoc NewTL = TLB.push(Result); NewTL.setLocalRangeBegin(TL.getLocalRangeBegin()); NewTL.setLocalRangeEnd(TL.getLocalRangeEnd()); - NewTL.setTrailingReturn(false); return Result; } @@ -5612,12 +5609,9 @@ StmtResult TreeTransform::TransformMSAsmStmt(MSAsmStmt *S) { ArrayRef AsmToks = llvm::makeArrayRef(S->getAsmToks(), S->getNumAsmToks()); - ArrayRef LineEnds = - llvm::makeArrayRef(S->getLineEnds(), S->getNumLineEnds()); - // No need to transform the asm string literal. - return getDerived().RebuildMSAsmStmt(S->getAsmLoc(), AsmToks, LineEnds, - S->getEndLoc()); + return getDerived().RebuildMSAsmStmt(S->getAsmLoc(), S->getLBraceLoc(), + AsmToks, S->getEndLoc()); } template @@ -5808,7 +5802,6 @@ TreeTransform::TransformObjCForCollectionStmt( // Build a new statement. return getDerived().RebuildObjCForCollectionStmt(S->getForLoc(), - /*FIXME:*/S->getForLoc(), Element.get(), Collection.get(), S->getRParenLoc(), diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index beef338..3adbc57 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -4259,7 +4259,6 @@ void TypeLocReader::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) { void TypeLocReader::VisitFunctionTypeLoc(FunctionTypeLoc TL) { TL.setLocalRangeBegin(ReadSourceLocation(Record, Idx)); TL.setLocalRangeEnd(ReadSourceLocation(Record, Idx)); - TL.setTrailingReturn(Record[Idx++]); for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i) { TL.setArg(i, ReadDeclAs(Record, Idx)); } diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index b7718c4..425d2e3 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -486,7 +486,6 @@ void TypeLocWriter::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) { void TypeLocWriter::VisitFunctionTypeLoc(FunctionTypeLoc TL) { Writer.AddSourceLocation(TL.getLocalRangeBegin(), Record); Writer.AddSourceLocation(TL.getLocalRangeEnd(), Record); - Record.push_back(TL.getTrailingReturn()); for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i) Writer.AddDeclRef(TL.getArg(i), Record); } diff --git a/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp index 30f45c7..5edcf09 100644 --- a/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp @@ -235,17 +235,20 @@ void CallAndMessageChecker::checkPreStmt(const CallExpr *CE, ProgramStateRef StNonNull, StNull; llvm::tie(StNonNull, StNull) = State->assume(cast(L)); - // FIXME: Do we want to record the non-null assumption here? if (StNull && !StNonNull) { if (!BT_call_null) BT_call_null.reset( new BuiltinBug("Called function pointer is null (null dereference)")); emitBadCall(BT_call_null.get(), C, Callee); } + + C.addTransition(StNonNull); } void CallAndMessageChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + // If this is a call to a C++ method, check if the callee is null or // undefined. if (const CXXInstanceCall *CC = dyn_cast(&Call)) { @@ -258,11 +261,9 @@ void CallAndMessageChecker::checkPreCall(const CallEvent &Call, return; } - ProgramStateRef State = C.getState(); ProgramStateRef StNonNull, StNull; llvm::tie(StNonNull, StNull) = State->assume(cast(V)); - // FIXME: Do we want to record the non-null assumption here? if (StNull && !StNonNull) { if (!BT_cxx_call_null) BT_cxx_call_null.reset(new BuiltinBug("Called C++ object pointer " @@ -270,6 +271,8 @@ void CallAndMessageChecker::checkPreCall(const CallEvent &Call, emitBadCall(BT_cxx_call_null.get(), C, CC->getCXXThisExpr()); return; } + + State = StNonNull; } // Don't check for uninitialized field values in arguments if the @@ -291,6 +294,9 @@ void CallAndMessageChecker::checkPreCall(const CallEvent &Call, Call.getArgExpr(i), /*IsFirstArgument=*/i == 0, checkUninitFields, Call, *BT)) return; + + // If we make it here, record our assumptions about the callee. + C.addTransition(State); } void CallAndMessageChecker::checkPreObjCMessage(const ObjCMethodCall &msg, diff --git a/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp index fea5733..b636efb 100644 --- a/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp +++ b/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp @@ -25,7 +25,8 @@ using namespace ento; namespace { class DynamicTypePropagation: - public Checker< check::PostCall, + public Checker< check::PreCall, + check::PostCall, check::PostStmt > { const ObjCObjectType *getObjectTypeForAllocAndNew(const ObjCMessageExpr *MsgE, CheckerContext &C) const; @@ -34,11 +35,70 @@ class DynamicTypePropagation: const ObjCObjectPointerType *getBetterObjCType(const Expr *CastE, CheckerContext &C) const; public: + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; void checkPostStmt(const ImplicitCastExpr *CastE, CheckerContext &C) const; }; } +static void recordFixedType(const MemRegion *Region, const CXXMethodDecl *MD, + CheckerContext &C) { + assert(Region); + assert(MD); + + ASTContext &Ctx = C.getASTContext(); + QualType Ty = Ctx.getPointerType(Ctx.getRecordType(MD->getParent())); + + ProgramStateRef State = C.getState(); + State = State->setDynamicTypeInfo(Region, Ty, /*CanBeSubclass=*/false); + C.addTransition(State); + return; +} + +void DynamicTypePropagation::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (const CXXConstructorCall *Ctor = dyn_cast(&Call)) { + // C++11 [class.cdtor]p4: When a virtual function is called directly or + // indirectly from a constructor or from a destructor, including during + // the construction or destruction of the class’s non-static data members, + // and the object to which the call applies is the object under + // construction or destruction, the function called is the final overrider + // in the constructor's or destructor's class and not one overriding it in + // a more-derived class. + + switch (Ctor->getOriginExpr()->getConstructionKind()) { + case CXXConstructExpr::CK_Complete: + case CXXConstructExpr::CK_Delegating: + // No additional type info necessary. + return; + case CXXConstructExpr::CK_NonVirtualBase: + case CXXConstructExpr::CK_VirtualBase: + if (const MemRegion *Target = Ctor->getCXXThisVal().getAsRegion()) + recordFixedType(Target, Ctor->getDecl(), C); + return; + } + + return; + } + + if (const CXXDestructorCall *Dtor = dyn_cast(&Call)) { + // C++11 [class.cdtor]p4 (see above) + + const MemRegion *Target = Dtor->getCXXThisVal().getAsRegion(); + if (!Target) + return; + + // FIXME: getRuntimeDefinition() can be expensive. It would be better to do + // this when we are entering the stack frame for the destructor. + const Decl *D = Dtor->getRuntimeDefinition().getDecl(); + if (!D) + return; + + recordFixedType(Target, cast(D), C); + return; + } +} + void DynamicTypePropagation::checkPostCall(const CallEvent &Call, CheckerContext &C) const { // We can obtain perfect type info for return values from some calls. @@ -82,6 +142,31 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call, break; } } + + return; + } + + if (const CXXConstructorCall *Ctor = dyn_cast(&Call)) { + // We may need to undo the effects of our pre-call check. + switch (Ctor->getOriginExpr()->getConstructionKind()) { + case CXXConstructExpr::CK_Complete: + case CXXConstructExpr::CK_Delegating: + // No additional work necessary. + // Note: This will leave behind the actual type of the object for + // complete constructors, but arguably that's a good thing, since it + // means the dynamic type info will be correct even for objects + // constructed with operator new. + return; + case CXXConstructExpr::CK_NonVirtualBase: + case CXXConstructExpr::CK_VirtualBase: + if (const MemRegion *Target = Ctor->getCXXThisVal().getAsRegion()) { + // We just finished a base constructor. Now we can use the subclass's + // type when resolving virtual calls. + const Decl *D = C.getLocationContext()->getDecl(); + recordFixedType(Target, cast(D), C); + } + return; + } } } diff --git a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp index 5503b23..3c00d99 100644 --- a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp @@ -1107,18 +1107,6 @@ RetainSummaryManager::getFunctionSummary(const FunctionDecl *FD) { if (S) break; - // Enable this code once the semantics of NSDeallocateObject are resolved - // for GC. -#if 0 - // Handle: NSDeallocateObject(id anObject); - // This method does allow 'nil' (although we don't check it now). - if (strcmp(FName, "NSDeallocateObject") == 0) { - return RetTy == Ctx.VoidTy - ? getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, Dealloc) - : getPersistentStopSummary(); - } -#endif - if (RetTy->isPointerType()) { // For CoreFoundation ('CF') types. if (cocoa::isRefType(RetTy, "CF", FName)) { @@ -1591,28 +1579,12 @@ void RetainSummaryManager::InitializeMethodSummaries() { addClassMethSummary("NSWindow", "alloc", NoTrackYet); -#if 0 - addInstMethSummary("NSWindow", NoTrackYet, "initWithContentRect", - "styleMask", "backing", "defer", NULL); - - addInstMethSummary("NSWindow", NoTrackYet, "initWithContentRect", - "styleMask", "backing", "defer", "screen", NULL); -#endif - // For NSPanel (which subclasses NSWindow), allocated objects are not // self-owned. // FIXME: For now we don't track NSPanels. object for the same reason // as for NSWindow objects. addClassMethSummary("NSPanel", "alloc", NoTrackYet); -#if 0 - addInstMethSummary("NSPanel", NoTrackYet, "initWithContentRect", - "styleMask", "backing", "defer", NULL); - - addInstMethSummary("NSPanel", NoTrackYet, "initWithContentRect", - "styleMask", "backing", "defer", "screen", NULL); -#endif - // Don't track allocated autorelease pools yet, as it is okay to prematurely // exit a method. addClassMethSummary("NSAutoreleasePool", "alloc", NoTrackYet); @@ -1636,62 +1608,6 @@ void RetainSummaryManager::InitializeMethodSummaries() { } //===----------------------------------------------------------------------===// -// AutoreleaseBindings - State used to track objects in autorelease pools. -//===----------------------------------------------------------------------===// -#define AUTORELEASE_POOL_MODELING (0) -// We do not currently have complete modeling of autorelease pools. - -#if AUTORELEASE_POOL_MODELING - -typedef llvm::ImmutableMap ARCounts; -typedef llvm::ImmutableMap ARPoolContents; -typedef llvm::ImmutableList ARStack; - -static int AutoRCIndex = 0; -static int AutoRBIndex = 0; - -namespace { class AutoreleasePoolContents {}; } -namespace { class AutoreleaseStack {}; } - -namespace clang { -namespace ento { -template<> struct ProgramStateTrait - : public ProgramStatePartialTrait { - static inline void *GDMIndex() { return &AutoRBIndex; } -}; - -template<> struct ProgramStateTrait - : public ProgramStatePartialTrait { - static inline void *GDMIndex() { return &AutoRCIndex; } -}; -} // end GR namespace -} // end clang namespace - -static SymbolRef GetCurrentAutoreleasePool(ProgramStateRef state) { - ARStack stack = state->get(); - return stack.isEmpty() ? SymbolRef() : stack.getHead(); -} - -static ProgramStateRef -SendAutorelease(ProgramStateRef state, - ARCounts::Factory &F, - SymbolRef sym) { - SymbolRef pool = GetCurrentAutoreleasePool(state); - const ARCounts *cnts = state->get(pool); - ARCounts newCnts(0); - - if (cnts) { - const unsigned *cnt = (*cnts).lookup(sym); - newCnts = F.add(*cnts, sym, cnt ? *cnt + 1 : 1); - } - else - newCnts = F.add(F.getEmptyMap(), sym, 1); - - return state->set(pool, newCnts); -} -#endif - -//===----------------------------------------------------------------------===// // Error reporting. //===----------------------------------------------------------------------===// namespace { @@ -2431,11 +2347,6 @@ class RetainCountChecker mutable OwningPtr Summaries; mutable OwningPtr SummariesGC; - -#if AUTORELEASE_POOL_MODELING - mutable ARCounts::Factory ARCountFactory; -#endif - mutable SummaryLogTy SummaryLog; mutable bool ShouldResetSummaryLog; @@ -2892,15 +2803,7 @@ void RetainCountChecker::checkSummary(const RetainSummary &Summ, ResultTy)); // FIXME: Add a flag to the checker where allocations are assumed to - // *not* fail. (The code below is out-of-date, though.) -#if 0 - if (RE.getKind() == RetEffect::OwnedAllocatedSymbol) { - bool isFeasible; - state = state.assume(loc::SymbolVal(Sym), true, isFeasible); - assert(isFeasible && "Cannot assume fresh symbol is non-null."); - } -#endif - + // *not* fail. break; } @@ -3011,9 +2914,6 @@ RetainCountChecker::updateSymbol(ProgramStateRef state, SymbolRef sym, case NewAutoreleasePool: assert(!C.isObjCGCEnabled()); -#if AUTORELEASE_POOL_MODELING - state = state->add(sym); -#endif return state; case MayEscape: @@ -3030,13 +2930,7 @@ RetainCountChecker::updateSymbol(ProgramStateRef state, SymbolRef sym, case Autorelease: if (C.isObjCGCEnabled()) return state; - // Update the autorelease counts. - // TODO: AutoreleasePoolContents are not currently used. We will need to - // call SendAutorelease after it's wired up. -#if AUTORELEASE_POOL_MODELING - state = SendAutorelease(state, ARCountFactory, sym); -#endif V = V.autorelease(); break; @@ -3719,35 +3613,6 @@ void RetainCountChecker::checkDeadSymbols(SymbolReaper &SymReaper, C.addTransition(state, Pred); } -//===----------------------------------------------------------------------===// -// Debug printing of refcount bindings and autorelease pools. -//===----------------------------------------------------------------------===// - -#if AUTORELEASE_POOL_MODELING -static void PrintPool(raw_ostream &Out, SymbolRef Sym, - ProgramStateRef State) { - Out << ' '; - if (Sym) - Sym->dumpToStream(Out); - else - Out << ""; - Out << ":{"; - - // Get the contents of the pool. - if (const ARCounts *Cnts = State->get(Sym)) - for (ARCounts::iterator I = Cnts->begin(), E = Cnts->end(); I != E; ++I) - Out << '(' << I.getKey() << ',' << I.getData() << ')'; - Out << '}'; -} - -static bool UsesAutorelease(ProgramStateRef state) { - // A state uses autorelease if it allocated an autorelease pool or if it has - // objects in the caller's autorelease pool. - return !state->get().isEmpty() || - state->get(SymbolRef()); -} -#endif - void RetainCountChecker::printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const { @@ -3761,20 +3626,6 @@ void RetainCountChecker::printState(raw_ostream &Out, ProgramStateRef State, I->second.print(Out); Out << NL; } - -#if AUTORELEASE_POOL_MODELING - // Print the autorelease stack. - if (UsesAutorelease(State)) { - Out << Sep << NL << "AR pool stack:"; - ARStack Stack = State->get(); - - PrintPool(Out, SymbolRef(), State); // Print the caller's pool. - for (ARStack::iterator I = Stack.begin(), E = Stack.end(); I != E; ++I) - PrintPool(Out, *I, State); - - Out << NL; - } -#endif } //===----------------------------------------------------------------------===// diff --git a/lib/StaticAnalyzer/Core/AnalysisManager.cpp b/lib/StaticAnalyzer/Core/AnalysisManager.cpp index 5aac640..efeba17 100644 --- a/lib/StaticAnalyzer/Core/AnalysisManager.cpp +++ b/lib/StaticAnalyzer/Core/AnalysisManager.cpp @@ -16,7 +16,7 @@ void AnalysisManager::anchor() { } AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags, const LangOptions &lang, - PathDiagnosticConsumer *pd, + const PathDiagnosticConsumers &PDC, StoreManagerCreator storemgr, ConstraintManagerCreator constraintmgr, CheckerManager *checkerMgr, @@ -33,7 +33,8 @@ AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags, AnalysisInliningMode IMode, bool NoRetry) : AnaCtxMgr(useUnoptimizedCFG, addImplicitDtors, /*addInitializers=*/true), - Ctx(ctx), Diags(diags), LangOpts(lang), PD(pd), + Ctx(ctx), Diags(diags), LangOpts(lang), + PathConsumers(PDC), CreateStoreMgr(storemgr), CreateConstraintMgr(constraintmgr), CheckerMgr(checkerMgr), MaxNodes(maxnodes), MaxVisit(maxvisit), @@ -49,29 +50,19 @@ AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags, AnaCtxMgr.getCFGBuildOptions().setAllAlwaysAdd(); } -AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags, - AnalysisManager &ParentAM) - : AnaCtxMgr(ParentAM.AnaCtxMgr.getUseUnoptimizedCFG(), - ParentAM.AnaCtxMgr.getCFGBuildOptions().AddImplicitDtors, - ParentAM.AnaCtxMgr.getCFGBuildOptions().AddInitializers), - Ctx(ctx), Diags(diags), - LangOpts(ParentAM.LangOpts), PD(ParentAM.getPathDiagnosticConsumer()), - CreateStoreMgr(ParentAM.CreateStoreMgr), - CreateConstraintMgr(ParentAM.CreateConstraintMgr), - CheckerMgr(ParentAM.CheckerMgr), - MaxNodes(ParentAM.MaxNodes), - MaxVisit(ParentAM.MaxVisit), - VisualizeEGDot(ParentAM.VisualizeEGDot), - VisualizeEGUbi(ParentAM.VisualizeEGUbi), - PurgeDead(ParentAM.PurgeDead), - EagerlyAssume(ParentAM.EagerlyAssume), - TrimGraph(ParentAM.TrimGraph), - EagerlyTrimEGraph(ParentAM.EagerlyTrimEGraph), - IPAMode(ParentAM.IPAMode), - InlineMaxStackDepth(ParentAM.InlineMaxStackDepth), - InlineMaxFunctionSize(ParentAM.InlineMaxFunctionSize), - InliningMode(ParentAM.InliningMode), - NoRetryExhausted(ParentAM.NoRetryExhausted) -{ - AnaCtxMgr.getCFGBuildOptions().setAllAlwaysAdd(); +AnalysisManager::~AnalysisManager() { + FlushDiagnostics(); + for (PathDiagnosticConsumers::iterator I = PathConsumers.begin(), + E = PathConsumers.end(); I != E; ++I) { + delete *I; + } +} + +void AnalysisManager::FlushDiagnostics() { + PathDiagnosticConsumer::FilesMade filesMade; + for (PathDiagnosticConsumers::iterator I = PathConsumers.begin(), + E = PathConsumers.end(); + I != E; ++I) { + (*I)->FlushDiagnostics(&filesMade); + } } diff --git a/lib/StaticAnalyzer/Core/BugReporter.cpp b/lib/StaticAnalyzer/Core/BugReporter.cpp index 7ba2fa7..571baec 100644 --- a/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -1345,6 +1345,9 @@ BugReport::~BugReport() { for (visitor_iterator I = visitor_begin(), E = visitor_end(); I != E; ++I) { delete *I; } + while (!interestingSymbols.empty()) { + popInterestingSymbolsAndRegions(); + } } const Decl *BugReport::getDeclWithIssue() const { @@ -1386,11 +1389,11 @@ void BugReport::markInteresting(SymbolRef sym) { return; // If the symbol wasn't already in our set, note a configuration change. - if (interestingSymbols.insert(sym).second) + if (getInterestingSymbols().insert(sym).second) ++ConfigurationChangeToken; if (const SymbolMetadata *meta = dyn_cast(sym)) - interestingRegions.insert(meta->getRegion()); + getInterestingRegions().insert(meta->getRegion()); } void BugReport::markInteresting(const MemRegion *R) { @@ -1399,11 +1402,11 @@ void BugReport::markInteresting(const MemRegion *R) { // If the base region wasn't already in our set, note a configuration change. R = R->getBaseRegion(); - if (interestingRegions.insert(R).second) + if (getInterestingRegions().insert(R).second) ++ConfigurationChangeToken; if (const SymbolicRegion *SR = dyn_cast(R)) - interestingSymbols.insert(SR->getSymbol()); + getInterestingSymbols().insert(SR->getSymbol()); } void BugReport::markInteresting(SVal V) { @@ -1411,30 +1414,58 @@ void BugReport::markInteresting(SVal V) { markInteresting(V.getAsSymbol()); } -bool BugReport::isInteresting(SVal V) const { +bool BugReport::isInteresting(SVal V) { return isInteresting(V.getAsRegion()) || isInteresting(V.getAsSymbol()); } -bool BugReport::isInteresting(SymbolRef sym) const { +bool BugReport::isInteresting(SymbolRef sym) { if (!sym) return false; // We don't currently consider metadata symbols to be interesting // even if we know their region is interesting. Is that correct behavior? - return interestingSymbols.count(sym); + return getInterestingSymbols().count(sym); } -bool BugReport::isInteresting(const MemRegion *R) const { +bool BugReport::isInteresting(const MemRegion *R) { if (!R) return false; R = R->getBaseRegion(); - bool b = interestingRegions.count(R); + bool b = getInterestingRegions().count(R); if (b) return true; if (const SymbolicRegion *SR = dyn_cast(R)) - return interestingSymbols.count(SR->getSymbol()); + return getInterestingSymbols().count(SR->getSymbol()); return false; } - + +void BugReport::lazyInitializeInterestingSets() { + if (interestingSymbols.empty()) { + interestingSymbols.push_back(new Symbols()); + interestingRegions.push_back(new Regions()); + } +} + +BugReport::Symbols &BugReport::getInterestingSymbols() { + lazyInitializeInterestingSets(); + return *interestingSymbols.back(); +} + +BugReport::Regions &BugReport::getInterestingRegions() { + lazyInitializeInterestingSets(); + return *interestingRegions.back(); +} + +void BugReport::pushInterestingSymbolsAndRegions() { + interestingSymbols.push_back(new Symbols(getInterestingSymbols())); + interestingRegions.push_back(new Regions(getInterestingRegions())); +} + +void BugReport::popInterestingSymbolsAndRegions() { + delete interestingSymbols.back(); + interestingSymbols.pop_back(); + delete interestingRegions.back(); + interestingRegions.pop_back(); +} const Stmt *BugReport::getStmt() const { if (!ErrorNode) @@ -1793,12 +1824,13 @@ static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM) { } void GRBugReporter::GeneratePathDiagnostic(PathDiagnostic& PD, - SmallVectorImpl &bugReports) { + PathDiagnosticConsumer &PC, + ArrayRef &bugReports) { assert(!bugReports.empty()); SmallVector errorNodes; - for (SmallVectorImpl::iterator I = bugReports.begin(), - E = bugReports.end(); I != E; ++I) { + for (ArrayRef::iterator I = bugReports.begin(), + E = bugReports.end(); I != E; ++I) { errorNodes.push_back((*I)->getErrorNode()); } @@ -1818,8 +1850,7 @@ void GRBugReporter::GeneratePathDiagnostic(PathDiagnostic& PD, const ExplodedNode *N = GPair.second.first; // Start building the path diagnostic... - PathDiagnosticBuilder PDB(*this, R, BackMap.get(), - getPathDiagnosticConsumer()); + PathDiagnosticBuilder PDB(*this, R, BackMap.get(), &PC); // Register additional node visitors. R->addVisitor(new NilReceiverBRVisitor()); @@ -1867,6 +1898,8 @@ void GRBugReporter::GeneratePathDiagnostic(PathDiagnostic& PD, case PathDiagnosticConsumer::Minimal: GenerateMinimalPathDiagnostic(PD, PDB, N, visitors); break; + case PathDiagnosticConsumer::None: + llvm_unreachable("PathDiagnosticConsumer::None should never appear here"); } // Clean up the visitors we used. @@ -2022,53 +2055,21 @@ FindReportInEquivalenceClass(BugReportEquivClass& EQ, return exampleReport; } -//===----------------------------------------------------------------------===// -// DiagnosticCache. This is a hack to cache analyzer diagnostics. It -// uses global state, which eventually should go elsewhere. -//===----------------------------------------------------------------------===// -namespace { -class DiagCacheItem : public llvm::FoldingSetNode { - llvm::FoldingSetNodeID ID; -public: - DiagCacheItem(BugReport *R, PathDiagnostic *PD) { - R->Profile(ID); - PD->Profile(ID); - } - - void Profile(llvm::FoldingSetNodeID &id) { - id = ID; - } - - llvm::FoldingSetNodeID &getID() { return ID; } -}; -} - -static bool IsCachedDiagnostic(BugReport *R, PathDiagnostic *PD) { - // FIXME: Eventually this diagnostic cache should reside in something - // like AnalysisManager instead of being a static variable. This is - // really unsafe in the long term. - typedef llvm::FoldingSet DiagnosticCache; - static DiagnosticCache DC; - - void *InsertPos; - DiagCacheItem *Item = new DiagCacheItem(R, PD); - - if (DC.FindNodeOrInsertPos(Item->getID(), InsertPos)) { - delete Item; - return true; - } - - DC.InsertNode(Item, InsertPos); - return false; -} - void BugReporter::FlushReport(BugReportEquivClass& EQ) { SmallVector bugReports; BugReport *exampleReport = FindReportInEquivalenceClass(EQ, bugReports); - if (!exampleReport) - return; - - PathDiagnosticConsumer* PD = getPathDiagnosticConsumer(); + if (exampleReport) { + const PathDiagnosticConsumers &C = getPathDiagnosticConsumers(); + for (PathDiagnosticConsumers::const_iterator I=C.begin(), + E=C.end(); I != E; ++I) { + FlushReport(exampleReport, **I, bugReports); + } + } +} + +void BugReporter::FlushReport(BugReport *exampleReport, + PathDiagnosticConsumer &PD, + ArrayRef bugReports) { // FIXME: Make sure we use the 'R' for the path that was actually used. // Probably doesn't make a difference in practice. @@ -2077,65 +2078,39 @@ void BugReporter::FlushReport(BugReportEquivClass& EQ) { OwningPtr D(new PathDiagnostic(exampleReport->getDeclWithIssue(), exampleReport->getBugType().getName(), - !PD || PD->useVerboseDescription() + PD.useVerboseDescription() ? exampleReport->getDescription() : exampleReport->getShortDescription(), BT.getCategory())); - if (!bugReports.empty()) - GeneratePathDiagnostic(*D.get(), bugReports); - - // Get the meta data. - const BugReport::ExtraTextList &Meta = - exampleReport->getExtraText(); - for (BugReport::ExtraTextList::const_iterator i = Meta.begin(), - e = Meta.end(); i != e; ++i) { - D->addMeta(*i); - } - - // Emit a summary diagnostic to the regular Diagnostics engine. - BugReport::ranges_iterator Beg, End; - llvm::tie(Beg, End) = exampleReport->getRanges(); - DiagnosticsEngine &Diag = getDiagnostic(); - - if (!IsCachedDiagnostic(exampleReport, D.get())) { - // Search the description for '%', as that will be interpretted as a - // format character by FormatDiagnostics. - StringRef desc = exampleReport->getShortDescription(); - - SmallString<512> TmpStr; - llvm::raw_svector_ostream Out(TmpStr); - for (StringRef::iterator I=desc.begin(), E=desc.end(); I!=E; ++I) { - if (*I == '%') - Out << "%%"; - else - Out << *I; - } - - Out.flush(); - unsigned ErrorDiag = Diag.getCustomDiagID(DiagnosticsEngine::Warning, TmpStr); - - DiagnosticBuilder diagBuilder = Diag.Report( - exampleReport->getLocation(getSourceManager()).asLocation(), ErrorDiag); - for (BugReport::ranges_iterator I = Beg; I != End; ++I) - diagBuilder << *I; + // Generate the full path diagnostic, using the generation scheme + // specified by the PathDiagnosticConsumer. + if (PD.getGenerationScheme() != PathDiagnosticConsumer::None) { + if (!bugReports.empty()) + GeneratePathDiagnostic(*D.get(), PD, bugReports); } - // Emit a full diagnostic for the path if we have a PathDiagnosticConsumer. - if (!PD) - return; - + // If the path is empty, generate a single step path with the location + // of the issue. if (D->path.empty()) { - PathDiagnosticPiece *piece = new PathDiagnosticEventPiece( - exampleReport->getLocation(getSourceManager()), - exampleReport->getDescription()); + PathDiagnosticLocation L = exampleReport->getLocation(getSourceManager()); + PathDiagnosticPiece *piece = + new PathDiagnosticEventPiece(L, exampleReport->getDescription()); + BugReport::ranges_iterator Beg, End; + llvm::tie(Beg, End) = exampleReport->getRanges(); for ( ; Beg != End; ++Beg) piece->addRange(*Beg); - D->getActivePath().push_back(piece); } - PD->HandlePathDiagnostic(D.take()); + // Get the meta data. + const BugReport::ExtraTextList &Meta = exampleReport->getExtraText(); + for (BugReport::ExtraTextList::const_iterator i = Meta.begin(), + e = Meta.end(); i != e; ++i) { + D->addMeta(*i); + } + + PD.HandlePathDiagnostic(D.take()); } void BugReporter::EmitBasicReport(const Decl *DeclWithIssue, diff --git a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index 46aa9e2..e729587 100644 --- a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -323,7 +323,7 @@ void bugreporter::addTrackNullOrUndefValueVisitor(const ExplodedNode *N, // Walk through lvalue-to-rvalue conversions. const Expr *Ex = dyn_cast(S); if (Ex) { - Ex = Ex->IgnoreParenLValueCasts(); + Ex = Ex->IgnoreParenCasts(); if (const DeclRefExpr *DR = dyn_cast(Ex)) { if (const VarDecl *VD = dyn_cast(DR->getDecl())) { const VarRegion *R = diff --git a/lib/StaticAnalyzer/Core/CallEvent.cpp b/lib/StaticAnalyzer/Core/CallEvent.cpp index e3f4c61..5345bd5 100644 --- a/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -207,10 +207,14 @@ SourceRange CallEvent::getArgSourceRange(unsigned Index) const { return ArgE->getSourceRange(); } +void CallEvent::dump() const { + dump(llvm::errs()); +} + void CallEvent::dump(raw_ostream &Out) const { ASTContext &Ctx = getState()->getStateManager().getContext(); if (const Expr *E = getOriginExpr()) { - E->printPretty(Out, Ctx, 0, Ctx.getPrintingPolicy()); + E->printPretty(Out, 0, Ctx.getPrintingPolicy()); Out << "\n"; return; } @@ -372,47 +376,49 @@ void CXXInstanceCall::getExtraInvalidatedRegions(RegionList &Regions) const { Regions.push_back(R); } -static const CXXMethodDecl *devirtualize(const CXXMethodDecl *MD, SVal ThisVal){ - const MemRegion *R = ThisVal.getAsRegion(); - if (!R) - return 0; - - const TypedValueRegion *TR = dyn_cast(R->StripCasts()); - if (!TR) - return 0; - - const CXXRecordDecl *RD = TR->getValueType()->getAsCXXRecordDecl(); - if (!RD) - return 0; - - const CXXMethodDecl *Result = MD->getCorrespondingMethodInClass(RD); - const FunctionDecl *Definition; - if (!Result->hasBody(Definition)) - return 0; - - return cast(Definition); -} - RuntimeDefinition CXXInstanceCall::getRuntimeDefinition() const { + // Do we have a decl at all? const Decl *D = getDecl(); if (!D) return RuntimeDefinition(); + // If the method is non-virtual, we know we can inline it. const CXXMethodDecl *MD = cast(D); if (!MD->isVirtual()) return AnyFunctionCall::getRuntimeDefinition(); - // If the method is virtual, see if we can find the actual implementation - // based on context-sensitivity. - // FIXME: Virtual method calls behave differently when an object is being - // constructed or destructed. It's not as simple as "no devirtualization" - // because a /partially/ constructed object can be referred to through a - // base pointer. We'll eventually want to use DynamicTypeInfo here. - if (const CXXMethodDecl *Devirtualized = devirtualize(MD, getCXXThisVal())) - return RuntimeDefinition(Devirtualized); + // Do we know the implicit 'this' object being called? + const MemRegion *R = getCXXThisVal().getAsRegion(); + if (!R) + return RuntimeDefinition(); - return RuntimeDefinition(); + // Do we know anything about the type of 'this'? + DynamicTypeInfo DynType = getState()->getDynamicTypeInfo(R); + if (!DynType.isValid()) + return RuntimeDefinition(); + + // Is the type a C++ class? (This is mostly a defensive check.) + QualType RegionType = DynType.getType()->getPointeeType(); + const CXXRecordDecl *RD = RegionType->getAsCXXRecordDecl(); + if (!RD || !RD->hasDefinition()) + return RuntimeDefinition(); + + // Find the decl for this method in that class. + const CXXMethodDecl *Result = MD->getCorrespondingMethodInClass(RD, true); + assert(Result && "At the very least the static decl should show up."); + + // Does the decl that we found have an implementation? + const FunctionDecl *Definition; + if (!Result->hasBody(Definition)) + return RuntimeDefinition(); + + // We found a definition. If we're not sure that this devirtualization is + // actually what will happen at runtime, make sure to provide the region so + // that ExprEngine can decide what to do with it. + if (DynType.canBeASubClass()) + return RuntimeDefinition(Definition, R->StripCasts()); + return RuntimeDefinition(Definition, /*DispatchRegion=*/0); } void CXXInstanceCall::getInitialStackFrameContents( @@ -421,16 +427,17 @@ void CXXInstanceCall::getInitialStackFrameContents( AnyFunctionCall::getInitialStackFrameContents(CalleeCtx, Bindings); // Handle the binding of 'this' in the new stack frame. - // We need to make sure we have the proper layering of CXXBaseObjectRegions. SVal ThisVal = getCXXThisVal(); if (!ThisVal.isUnknown()) { ProgramStateManager &StateMgr = getState()->getStateManager(); SValBuilder &SVB = StateMgr.getSValBuilder(); - + const CXXMethodDecl *MD = cast(CalleeCtx->getDecl()); Loc ThisLoc = SVB.getCXXThis(MD, CalleeCtx); - if (const MemRegion *ThisReg = ThisVal.getAsRegion()) { + // If we devirtualized to a different member function, we need to make sure + // we have the proper layering of CXXBaseObjectRegions. + if (MD->getCanonicalDecl() != getDecl()->getCanonicalDecl()) { ASTContext &Ctx = SVB.getContext(); const CXXRecordDecl *Class = MD->getParent(); QualType Ty = Ctx.getPointerType(Ctx.getRecordType(Class)); @@ -439,13 +446,10 @@ void CXXInstanceCall::getInitialStackFrameContents( bool Failed; ThisVal = StateMgr.getStoreManager().evalDynamicCast(ThisVal, Ty, Failed); assert(!Failed && "Calling an incorrectly devirtualized method"); - - // If we couldn't build the correct cast, just strip off all casts. - if (ThisVal.isUnknown()) - ThisVal = loc::MemRegionVal(ThisReg->StripCasts()); } - Bindings.push_back(std::make_pair(ThisLoc, ThisVal)); + if (!ThisVal.isUnknown()) + Bindings.push_back(std::make_pair(ThisLoc, ThisVal)); } } @@ -666,6 +670,9 @@ bool ObjCMethodCall::canBeOverridenInSubclass(ObjCInterfaceDecl *IDecl, if (InterfLoc.isValid() && SM.isFromMainFile(InterfLoc)) return false; + // Assume that property accessors are not overridden. + if (getMessageKind() == OCM_PropertyAccess) + return false; // We assume that if the method is public (declared outside of main file) or // has a parent which publicly declares the method, the method could be @@ -853,4 +860,3 @@ CallEventManager::getCaller(const StackFrameContext *CalleeCtx, return getCXXDestructorCall(Dtor, Trigger, ThisVal.getAsRegion(), State, CallerCtx); } - diff --git a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp index 8ee6723..3b2e4ec 100644 --- a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp +++ b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -154,6 +154,11 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) { } } + // Generate a CallEvent /before/ cleaning the state, so that we can get the + // correct value for 'this' (if necessary). + CallEventManager &CEMgr = getStateManager().getCallEventManager(); + CallEventRef<> Call = CEMgr.getCaller(calleeCtx, state); + // Step 3: BindedRetNode -> CleanedNodes // If we can find a statement and a block in the inlined function, run remove // dead bindings before returning from the call. This is important to ensure @@ -203,21 +208,21 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) { &Ctx); SaveAndRestore CBISave(currentStmtIdx, calleeCtx->getIndex()); - CallEventManager &CEMgr = getStateManager().getCallEventManager(); - CallEventRef<> Call = CEMgr.getCaller(calleeCtx, CEEState); + CallEventRef<> UpdatedCall = Call.cloneWithState(CEEState); ExplodedNodeSet DstPostCall; - getCheckerManager().runCheckersForPostCall(DstPostCall, CEENode, *Call, - *this, true); + getCheckerManager().runCheckersForPostCall(DstPostCall, CEENode, + *UpdatedCall, *this, + /*WasInlined=*/true); ExplodedNodeSet Dst; - if (isa(Call)) { - getCheckerManager().runCheckersForPostObjCMessage(Dst, DstPostCall, - cast(*Call), - *this, true); + if (const ObjCMethodCall *Msg = dyn_cast(Call)) { + getCheckerManager().runCheckersForPostObjCMessage(Dst, DstPostCall, *Msg, + *this, + /*WasInlined=*/true); } else if (CE) { getCheckerManager().runCheckersForPostStmt(Dst, DstPostCall, CE, - *this, true); + *this, /*WasInlined=*/true); } else { Dst.insert(DstPostCall); } @@ -555,12 +560,20 @@ void ExprEngine::defaultEvalCall(NodeBuilder &Bldr, ExplodedNode *Pred, RuntimeDefinition RD = Call->getRuntimeDefinition(); const Decl *D = RD.getDecl(); if (D) { - // Explore with and without inlining the call. - if (RD.mayHaveOtherDefinitions() && - getAnalysisManager().IPAMode == DynamicDispatchBifurcate) { - BifurcateCall(RD.getDispatchRegion(), *Call, D, Bldr, Pred); - return; + if (RD.mayHaveOtherDefinitions()) { + // Explore with and without inlining the call. + if (getAnalysisManager().IPAMode == DynamicDispatchBifurcate) { + BifurcateCall(RD.getDispatchRegion(), *Call, D, Bldr, Pred); + return; + } + + // Don't inline if we're not in any dynamic dispatch mode. + if (getAnalysisManager().IPAMode != DynamicDispatch) { + conservativeEvalCall(*Call, Bldr, Pred, State); + return; + } } + // We are not bifurcating and we do have a Decl, so just inline. if (inlineCall(*Call, D, Bldr, Pred, State)) return; @@ -575,6 +588,7 @@ void ExprEngine::BifurcateCall(const MemRegion *BifurReg, const CallEvent &Call, const Decl *D, NodeBuilder &Bldr, ExplodedNode *Pred) { assert(BifurReg); + BifurReg = BifurReg->StripCasts(); // Check if we've performed the split already - note, we only want // to split the path once per memory region. diff --git a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 0152e32..982bcbf 100644 --- a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -45,7 +45,7 @@ public: virtual ~HTMLDiagnostics() { FlushDiagnostics(NULL); } virtual void FlushDiagnosticsImpl(std::vector &Diags, - SmallVectorImpl *FilesMade); + FilesMade *filesMade); virtual StringRef getName() const { return "HTMLDiagnostics"; @@ -63,7 +63,7 @@ public: const char *HighlightEnd = ""); void ReportDiag(const PathDiagnostic& D, - SmallVectorImpl *FilesMade); + FilesMade *filesMade); }; } // end anonymous namespace @@ -76,10 +76,10 @@ HTMLDiagnostics::HTMLDiagnostics(const std::string& prefix, FilePrefix.appendComponent("report"); } -PathDiagnosticConsumer* -ento::createHTMLDiagnosticConsumer(const std::string& prefix, - const Preprocessor &PP) { - return new HTMLDiagnostics(prefix, PP); +void ento::createHTMLDiagnosticConsumer(PathDiagnosticConsumers &C, + const std::string& prefix, + const Preprocessor &PP) { + C.push_back(new HTMLDiagnostics(prefix, PP)); } //===----------------------------------------------------------------------===// @@ -88,15 +88,15 @@ ento::createHTMLDiagnosticConsumer(const std::string& prefix, void HTMLDiagnostics::FlushDiagnosticsImpl( std::vector &Diags, - SmallVectorImpl *FilesMade) { + FilesMade *filesMade) { for (std::vector::iterator it = Diags.begin(), et = Diags.end(); it != et; ++it) { - ReportDiag(**it, FilesMade); + ReportDiag(**it, filesMade); } } void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, - SmallVectorImpl *FilesMade) { + FilesMade *filesMade) { // Create the HTML directory if it is missing. if (!createdDir) { @@ -266,8 +266,10 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, return; } - if (FilesMade) - FilesMade->push_back(llvm::sys::path::filename(H.str())); + if (filesMade) { + filesMade->push_back(std::make_pair(StringRef(getName()), + llvm::sys::path::filename(H.str()))); + } // Emit the HTML to disk. for (RewriteBuffer::iterator I = Buf->begin(), E = Buf->end(); I!=E; ++I) @@ -480,29 +482,11 @@ void HTMLDiagnostics::HandlePiece(Rewriter& R, FileID BugFileID, R.InsertTextBefore(Loc, os.str()); // Now highlight the ranges. - for (const SourceRange *I = P.ranges_begin(), *E = P.ranges_end(); - I != E; ++I) + ArrayRef Ranges = P.getRanges(); + for (ArrayRef::iterator I = Ranges.begin(), + E = Ranges.end(); I != E; ++I) { HighlightRange(R, LPosInfo.first, *I); - -#if 0 - // If there is a code insertion hint, insert that code. - // FIXME: This code is disabled because it seems to mangle the HTML - // output. I'm leaving it here because it's generally the right idea, - // but needs some help from someone more familiar with the rewriter. - for (const FixItHint *Hint = P.fixit_begin(), *HintEnd = P.fixit_end(); - Hint != HintEnd; ++Hint) { - if (Hint->RemoveRange.isValid()) { - HighlightRange(R, LPosInfo.first, Hint->RemoveRange, - "", ""); - } - if (Hint->InsertionLoc.isValid()) { - std::string EscapedCode = html::EscapeText(Hint->CodeToInsert, true); - EscapedCode = "" + EscapedCode - + ""; - R.InsertTextBefore(Hint->InsertionLoc, EscapedCode); - } } -#endif } static void EmitAlphaCounter(raw_ostream &os, unsigned n) { diff --git a/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/lib/StaticAnalyzer/Core/PathDiagnostic.cpp index 7d52aac..c849778 100644 --- a/lib/StaticAnalyzer/Core/PathDiagnostic.cpp +++ b/lib/StaticAnalyzer/Core/PathDiagnostic.cpp @@ -157,13 +157,13 @@ void PathDiagnosticConsumer::HandlePathDiagnostic(PathDiagnostic *D) { return; // FIXME: Emit a warning? // Check the source ranges. - for (PathDiagnosticPiece::range_iterator RI = piece->ranges_begin(), - RE = piece->ranges_end(); - RI != RE; ++RI) { - SourceLocation L = SMgr.getExpansionLoc(RI->getBegin()); + ArrayRef Ranges = piece->getRanges(); + for (ArrayRef::iterator I = Ranges.begin(), + E = Ranges.end(); I != E; ++I) { + SourceLocation L = SMgr.getExpansionLoc(I->getBegin()); if (!L.isFileID() || SMgr.getFileID(L) != FID) return; // FIXME: Emit a warning? - L = SMgr.getExpansionLoc(RI->getEnd()); + L = SMgr.getExpansionLoc(I->getEnd()); if (!L.isFileID() || SMgr.getFileID(L) != FID) return; // FIXME: Emit a warning? } @@ -240,8 +240,8 @@ struct CompareDiagnostics { }; } -void -PathDiagnosticConsumer::FlushDiagnostics(SmallVectorImpl *Files) { +void PathDiagnosticConsumer::FlushDiagnostics( + PathDiagnosticConsumer::FilesMade *Files) { if (flushed) return; @@ -718,7 +718,9 @@ void PathDiagnosticPiece::Profile(llvm::FoldingSetNodeID &ID) const { ID.AddString(str); // FIXME: Add profiling support for code hints. ID.AddInteger((unsigned) getDisplayHint()); - for (range_iterator I = ranges_begin(), E = ranges_end(); I != E; ++I) { + ArrayRef Ranges = getRanges(); + for (ArrayRef::iterator I = Ranges.begin(), E = Ranges.end(); + I != E; ++I) { ID.AddInteger(I->getBegin().getRawEncoding()); ID.AddInteger(I->getEnd().getRawEncoding()); } diff --git a/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp index 58a4bba..d5fdd9d 100644 --- a/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ b/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -30,23 +30,21 @@ namespace { class PlistDiagnostics : public PathDiagnosticConsumer { const std::string OutputFile; const LangOptions &LangOpts; - OwningPtr SubPD; const bool SupportsCrossFileDiagnostics; public: PlistDiagnostics(const std::string& prefix, const LangOptions &LangOpts, - bool supportsMultipleFiles, - PathDiagnosticConsumer *subPD); + bool supportsMultipleFiles); virtual ~PlistDiagnostics() {} void FlushDiagnosticsImpl(std::vector &Diags, - SmallVectorImpl *FilesMade); + FilesMade *filesMade); virtual StringRef getName() const { return "PlistDiagnostics"; } - PathGenerationScheme getGenerationScheme() const; + PathGenerationScheme getGenerationScheme() const { return Extensive; } bool supportsLogicalOpControlFlow() const { return true; } bool supportsAllBlockEdges() const { return true; } virtual bool useVerboseDescription() const { return false; } @@ -58,29 +56,20 @@ namespace { PlistDiagnostics::PlistDiagnostics(const std::string& output, const LangOptions &LO, - bool supportsMultipleFiles, - PathDiagnosticConsumer *subPD) - : OutputFile(output), LangOpts(LO), SubPD(subPD), + bool supportsMultipleFiles) + : OutputFile(output), LangOpts(LO), SupportsCrossFileDiagnostics(supportsMultipleFiles) {} -PathDiagnosticConsumer* -ento::createPlistDiagnosticConsumer(const std::string& s, const Preprocessor &PP, - PathDiagnosticConsumer *subPD) { - return new PlistDiagnostics(s, PP.getLangOpts(), false, subPD); +void ento::createPlistDiagnosticConsumer(PathDiagnosticConsumers &C, + const std::string& s, + const Preprocessor &PP) { + C.push_back(new PlistDiagnostics(s, PP.getLangOpts(), false)); } -PathDiagnosticConsumer* -ento::createPlistMultiFileDiagnosticConsumer(const std::string &s, - const Preprocessor &PP) { - return new PlistDiagnostics(s, PP.getLangOpts(), true, 0); -} - -PathDiagnosticConsumer::PathGenerationScheme -PlistDiagnostics::getGenerationScheme() const { - if (const PathDiagnosticConsumer *PD = SubPD.get()) - return PD->getGenerationScheme(); - - return Extensive; +void ento::createPlistMultiFileDiagnosticConsumer(PathDiagnosticConsumers &C, + const std::string &s, + const Preprocessor &PP) { + C.push_back(new PlistDiagnostics(s, PP.getLangOpts(), true)); } static void AddFID(FIDMap &FIDs, SmallVectorImpl &V, @@ -231,15 +220,16 @@ static void ReportEvent(raw_ostream &o, const PathDiagnosticPiece& P, EmitLocation(o, SM, LangOpts, L, FM, indent); // Output the ranges (if any). - PathDiagnosticPiece::range_iterator RI = P.ranges_begin(), - RE = P.ranges_end(); + ArrayRef Ranges = P.getRanges(); - if (RI != RE) { + if (!Ranges.empty()) { Indent(o, indent) << "ranges\n"; Indent(o, indent) << "\n"; ++indent; - for (; RI != RE; ++RI) - EmitRange(o, SM, LangOpts, *RI, FM, indent+1); + for (ArrayRef::iterator I = Ranges.begin(), E = Ranges.end(); + I != E; ++I) { + EmitRange(o, SM, LangOpts, *I, FM, indent+1); + } --indent; Indent(o, indent) << "\n"; } @@ -353,7 +343,7 @@ static void ReportPiece(raw_ostream &o, void PlistDiagnostics::FlushDiagnosticsImpl( std::vector &Diags, - SmallVectorImpl *FilesMade) { + FilesMade *filesMade) { // Build up a set of FIDs that we use by scanning the locations and // ranges of the diagnostics. FIDMap FM; @@ -380,11 +370,11 @@ void PlistDiagnostics::FlushDiagnosticsImpl( I!=E; ++I) { const PathDiagnosticPiece *piece = I->getPtr(); AddFID(FM, Fids, SM, piece->getLocation().asLocation()); - - for (PathDiagnosticPiece::range_iterator RI = piece->ranges_begin(), - RE= piece->ranges_end(); RI != RE; ++RI) { - AddFID(FM, Fids, SM, RI->getBegin()); - AddFID(FM, Fids, SM, RI->getEnd()); + ArrayRef Ranges = piece->getRanges(); + for (ArrayRef::iterator I = Ranges.begin(), + E = Ranges.end(); I != E; ++I) { + AddFID(FM, Fids, SM, I->getBegin()); + AddFID(FM, Fids, SM, I->getEnd()); } if (const PathDiagnosticCallPiece *call = @@ -507,19 +497,21 @@ void PlistDiagnostics::FlushDiagnosticsImpl( EmitLocation(o, *SM, LangOpts, D->getLocation(), FM, 2); // Output the diagnostic to the sub-diagnostic client, if any. - if (SubPD) { - std::vector SubDiags; - SubDiags.push_back(D); - SmallVector SubFilesMade; - SubPD->FlushDiagnosticsImpl(SubDiags, &SubFilesMade); - - if (!SubFilesMade.empty()) { - o << " " << SubPD->getName() << "_files\n"; - o << " \n"; - for (size_t i = 0, n = SubFilesMade.size(); i < n ; ++i) - o << " " << SubFilesMade[i] << "\n"; - o << " \n"; + if (!filesMade->empty()) { + StringRef lastName; + for (FilesMade::iterator I = filesMade->begin(), E = filesMade->end(); + I != E; ++I) { + StringRef newName = I->first; + if (newName != lastName) { + if (!lastName.empty()) + o << " \n"; + lastName = newName; + o << " " << lastName << "_files\n"; + o << " \n"; + } + o << " " << I->second << "\n"; } + o << " \n"; } // Close up the entry. @@ -531,6 +523,8 @@ void PlistDiagnostics::FlushDiagnosticsImpl( // Finish. o << "\n"; - if (FilesMade) - FilesMade->push_back(OutputFile); + if (filesMade) { + StringRef Name(getName()); + filesMade->push_back(std::make_pair(Name, OutputFile)); + } } diff --git a/lib/StaticAnalyzer/Core/ProgramState.cpp b/lib/StaticAnalyzer/Core/ProgramState.cpp index dc988cc..2000338 100644 --- a/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -745,14 +745,16 @@ template<> struct ProgramStateTrait }} DynamicTypeInfo ProgramState::getDynamicTypeInfo(const MemRegion *Reg) const { + Reg = Reg->StripCasts(); + // Look up the dynamic type in the GDM. const DynamicTypeInfo *GDMType = get(Reg); if (GDMType) return *GDMType; // Otherwise, fall back to what we know about the region. - if (const TypedValueRegion *TR = dyn_cast(Reg)) - return DynamicTypeInfo(TR->getValueType()); + if (const TypedRegion *TR = dyn_cast(Reg)) + return DynamicTypeInfo(TR->getLocationType(), /*CanBeSubclass=*/false); if (const SymbolicRegion *SR = dyn_cast(Reg)) { SymbolRef Sym = SR->getSymbol(); @@ -764,6 +766,7 @@ DynamicTypeInfo ProgramState::getDynamicTypeInfo(const MemRegion *Reg) const { ProgramStateRef ProgramState::setDynamicTypeInfo(const MemRegion *Reg, DynamicTypeInfo NewTy) const { + Reg = Reg->StripCasts(); ProgramStateRef NewState = set(Reg, NewTy); assert(NewState); return NewState; diff --git a/lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp b/lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp index e5b8553..66bf4bb 100644 --- a/lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp +++ b/lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp @@ -32,7 +32,7 @@ public: : OutputFile(output), Diag(diag) {} void FlushDiagnosticsImpl(std::vector &Diags, - SmallVectorImpl *FilesMade); + FilesMade *filesMade); virtual StringRef getName() const { return "TextPathDiagnostics"; @@ -47,15 +47,15 @@ public: } // end anonymous namespace -PathDiagnosticConsumer* -ento::createTextPathDiagnosticConsumer(const std::string& out, - const Preprocessor &PP) { - return new TextPathDiagnostics(out, PP.getDiagnostics()); +void ento::createTextPathDiagnosticConsumer(PathDiagnosticConsumers &C, + const std::string& out, + const Preprocessor &PP) { + C.push_back(new TextPathDiagnostics(out, PP.getDiagnostics())); } void TextPathDiagnostics::FlushDiagnosticsImpl( std::vector &Diags, - SmallVectorImpl *FilesMade) { + FilesMade *) { for (std::vector::iterator it = Diags.begin(), et = Diags.end(); it != et; ++it) { const PathDiagnostic *D = *it; diff --git a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp index fcdaaea..34b5266 100644 --- a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp +++ b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -64,14 +64,55 @@ STATISTIC(MaxCFGSize, "The maximum number of basic blocks in a function."); // Special PathDiagnosticConsumers. //===----------------------------------------------------------------------===// -static PathDiagnosticConsumer* -createPlistHTMLDiagnosticConsumer(const std::string& prefix, - const Preprocessor &PP) { - PathDiagnosticConsumer *PD = - createHTMLDiagnosticConsumer(llvm::sys::path::parent_path(prefix), PP); - return createPlistDiagnosticConsumer(prefix, PP, PD); +static void createPlistHTMLDiagnosticConsumer(PathDiagnosticConsumers &C, + const std::string &prefix, + const Preprocessor &PP) { + createHTMLDiagnosticConsumer(C, llvm::sys::path::parent_path(prefix), PP); + createPlistDiagnosticConsumer(C, prefix, PP); } +namespace { +class ClangDiagPathDiagConsumer : public PathDiagnosticConsumer { + DiagnosticsEngine &Diag; +public: + ClangDiagPathDiagConsumer(DiagnosticsEngine &Diag) : Diag(Diag) {} + virtual ~ClangDiagPathDiagConsumer() {} + virtual StringRef getName() const { return "ClangDiags"; } + virtual bool useVerboseDescription() const { return false; } + virtual PathGenerationScheme getGenerationScheme() const { return None; } + + void FlushDiagnosticsImpl(std::vector &Diags, + FilesMade *filesMade) { + for (std::vector::iterator I = Diags.begin(), + E = Diags.end(); I != E; ++I) { + const PathDiagnostic *PD = *I; + StringRef desc = PD->getDescription(); + SmallString<512> TmpStr; + llvm::raw_svector_ostream Out(TmpStr); + for (StringRef::iterator I=desc.begin(), E=desc.end(); I!=E; ++I) { + if (*I == '%') + Out << "%%"; + else + Out << *I; + } + Out.flush(); + unsigned ErrorDiag = Diag.getCustomDiagID(DiagnosticsEngine::Warning, + TmpStr); + SourceLocation L = PD->getLocation().asLocation(); + DiagnosticBuilder diagBuilder = Diag.Report(L, ErrorDiag); + + // Get the ranges from the last point in the path. + ArrayRef Ranges = PD->path.back()->getRanges(); + + for (ArrayRef::iterator I = Ranges.begin(), + E = Ranges.end(); I != E; ++I) { + diagBuilder << *I; + } + } + } +}; +} // end anonymous namespace + //===----------------------------------------------------------------------===// // AnalysisConsumer declaration. //===----------------------------------------------------------------------===// @@ -105,8 +146,8 @@ public: /// working with a PCH file. SetOfDecls LocalTUDecls; - // PD is owned by AnalysisManager. - PathDiagnosticConsumer *PD; + // Set of PathDiagnosticConsumers. Owned by AnalysisManager. + PathDiagnosticConsumers PathConsumers; StoreManagerCreator CreateStoreMgr; ConstraintManagerCreator CreateConstraintMgr; @@ -126,7 +167,7 @@ public: const AnalyzerOptions& opts, ArrayRef plugins) : RecVisitorMode(ANALYSIS_ALL), RecVisitorBR(0), - Ctx(0), PP(pp), OutDir(outdir), Opts(opts), Plugins(plugins), PD(0) { + Ctx(0), PP(pp), OutDir(outdir), Opts(opts), Plugins(plugins) { DigestAnalyzerOptions(); if (Opts.PrintStats) { llvm::EnableStatistics(); @@ -141,17 +182,19 @@ public: void DigestAnalyzerOptions() { // Create the PathDiagnosticConsumer. + PathConsumers.push_back(new ClangDiagPathDiagConsumer(PP.getDiagnostics())); + if (!OutDir.empty()) { switch (Opts.AnalysisDiagOpt) { default: #define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATEFN, AUTOCREATE) \ - case PD_##NAME: PD = CREATEFN(OutDir, PP); break; + case PD_##NAME: CREATEFN(PathConsumers, OutDir, PP); break; #include "clang/Frontend/Analyses.def" } } else if (Opts.AnalysisDiagOpt == PD_TEXT) { // Create the text client even without a specified output file since // it just uses diagnostic notes. - PD = createTextPathDiagnosticConsumer("", PP); + createTextPathDiagnosticConsumer(PathConsumers, "", PP); } // Create the analyzer component creators. @@ -205,9 +248,12 @@ public: Ctx = &Context; checkerMgr.reset(createCheckerManager(Opts, PP.getLangOpts(), Plugins, PP.getDiagnostics())); - Mgr.reset(new AnalysisManager(*Ctx, PP.getDiagnostics(), - PP.getLangOpts(), PD, - CreateStoreMgr, CreateConstraintMgr, + Mgr.reset(new AnalysisManager(*Ctx, + PP.getDiagnostics(), + PP.getLangOpts(), + PathConsumers, + CreateStoreMgr, + CreateConstraintMgr, checkerMgr.get(), Opts.MaxNodes, Opts.MaxLoop, Opts.VisualizeEGDot, Opts.VisualizeEGUbi, -- cgit v1.1 From 6c5ad005bed33e80c94460b6694d199348dac472 Mon Sep 17 00:00:00 2001 From: grehan Date: Sun, 26 Aug 2012 01:41:41 +0000 Subject: Add sysctls to display the total and free amount of hard-wired mem for VMs # sysctl hw.vmm hw.vmm.mem_free: 2145386496 hw.vmm.mem_total: 2145386496 Submitted by: Takeshi HASEGAWA hasegaw at gmail com --- lib/libvmmapi/vmmapi.c | 24 ++++++++++++++++++++++++ lib/libvmmapi/vmmapi.h | 2 ++ 2 files changed, 26 insertions(+) (limited to 'lib') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 5882bd2..d7e6143 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -117,6 +117,30 @@ vm_destroy(struct vmctx *vm) free(vm); } +size_t +vmm_get_mem_total(void) +{ + size_t mem_total = 0; + size_t oldlen = sizeof(mem_total); + int error; + error = sysctlbyname("hw.vmm.mem_total", &mem_total, &oldlen, NULL, 0); + if (error) + return -1; + return mem_total; +} + +size_t +vmm_get_mem_free(void) +{ + size_t mem_free = 0; + size_t oldlen = sizeof(mem_free); + int error; + error = sysctlbyname("hw.vmm.mem_free", &mem_free, &oldlen, NULL, 0); + if (error) + return -1; + return mem_free; +} + int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, vm_paddr_t *ret_hpa, size_t *ret_len) diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index de3d4b7..516bbc3 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -34,6 +34,8 @@ struct vmctx; int vm_create(const char *name); struct vmctx *vm_open(const char *name); void vm_destroy(struct vmctx *ctx); +size_t vmm_get_mem_total(void); +size_t vmm_get_mem_free(void); int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, vm_paddr_t *ret_hpa, size_t *ret_len); /* -- cgit v1.1 From be47cf2d63c5d79e95cc63a17f7dcefd3035e5da Mon Sep 17 00:00:00 2001 From: dougb Date: Thu, 20 Sep 2012 03:38:28 +0000 Subject: Vendor import of BIND 9.8.3-P3 --- lib/dns/include/dns/rdata.h | 11 +++++++++++ lib/dns/master.c | 2 +- lib/dns/rdata.c | 28 ++++++++++++++++++++++------ lib/dns/rdataslab.c | 1 + 4 files changed, 35 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/dns/include/dns/rdata.h b/lib/dns/include/dns/rdata.h index e3183c0..c3e7db6 100644 --- a/lib/dns/include/dns/rdata.h +++ b/lib/dns/include/dns/rdata.h @@ -147,6 +147,17 @@ struct dns_rdata { (((rdata)->flags & ~(DNS_RDATA_UPDATE|DNS_RDATA_OFFLINE)) == 0) /* + * The maximum length of a RDATA that can be sent on the wire. + * Max packet size (65535) less header (12), less name (1), type (2), + * class (2), ttl(4), length (2). + * + * None of the defined types that support name compression can exceed + * this and all new types are to be sent uncompressed. + */ + +#define DNS_RDATA_MAXLENGTH 65512U + +/* * Flags affecting rdata formatting style. Flags 0xFFFF0000 * are used by masterfile-level formatting and defined elsewhere. * See additional comments at dns_rdata_tofmttext(). diff --git a/lib/dns/master.c b/lib/dns/master.c index ae07e55..7f6cf58 100644 --- a/lib/dns/master.c +++ b/lib/dns/master.c @@ -75,7 +75,7 @@ /*% * max message size - header - root - type - class - ttl - rdlen */ -#define MINTSIZ (65535 - 12 - 1 - 2 - 2 - 4 - 2) +#define MINTSIZ DNS_RDATA_MAXLENGTH /*% * Size for tokens in the presentation format, * The largest tokens are the base64 blocks in KEY and CERT records, diff --git a/lib/dns/rdata.c b/lib/dns/rdata.c index f6807d1..d200f1b 100644 --- a/lib/dns/rdata.c +++ b/lib/dns/rdata.c @@ -429,6 +429,7 @@ dns_rdata_fromwire(dns_rdata_t *rdata, dns_rdataclass_t rdclass, isc_buffer_t st; isc_boolean_t use_default = ISC_FALSE; isc_uint32_t activelength; + size_t length; REQUIRE(dctx != NULL); if (rdata != NULL) { @@ -459,6 +460,14 @@ dns_rdata_fromwire(dns_rdata_t *rdata, dns_rdataclass_t rdclass, } /* + * Reject any rdata that expands out to more than DNS_RDATA_MAXLENGTH + * as we cannot transmit it. + */ + length = isc_buffer_usedlength(target) - isc_buffer_usedlength(&st); + if (result == ISC_R_SUCCESS && length > DNS_RDATA_MAXLENGTH) + result = DNS_R_FORMERR; + + /* * We should have consumed all of our buffer. */ if (result == ISC_R_SUCCESS && !buffer_empty(source)) @@ -466,8 +475,7 @@ dns_rdata_fromwire(dns_rdata_t *rdata, dns_rdataclass_t rdclass, if (rdata != NULL && result == ISC_R_SUCCESS) { region.base = isc_buffer_used(&st); - region.length = isc_buffer_usedlength(target) - - isc_buffer_usedlength(&st); + region.length = length; dns_rdata_fromregion(rdata, rdclass, type, ®ion); } @@ -602,6 +610,7 @@ dns_rdata_fromtext(dns_rdata_t *rdata, dns_rdataclass_t rdclass, unsigned long line; void (*callback)(dns_rdatacallbacks_t *, const char *, ...); isc_result_t tresult; + size_t length; REQUIRE(origin == NULL || dns_name_isabsolute(origin) == ISC_TRUE); if (rdata != NULL) { @@ -673,10 +682,13 @@ dns_rdata_fromtext(dns_rdata_t *rdata, dns_rdataclass_t rdclass, } } while (1); + length = isc_buffer_usedlength(target) - isc_buffer_usedlength(&st); + if (result == ISC_R_SUCCESS && length > DNS_RDATA_MAXLENGTH) + result = ISC_R_NOSPACE; + if (rdata != NULL && result == ISC_R_SUCCESS) { region.base = isc_buffer_used(&st); - region.length = isc_buffer_usedlength(target) - - isc_buffer_usedlength(&st); + region.length = length; dns_rdata_fromregion(rdata, rdclass, type, ®ion); } if (result != ISC_R_SUCCESS) { @@ -804,6 +816,7 @@ dns_rdata_fromstruct(dns_rdata_t *rdata, dns_rdataclass_t rdclass, isc_buffer_t st; isc_region_t region; isc_boolean_t use_default = ISC_FALSE; + size_t length; REQUIRE(source != NULL); if (rdata != NULL) { @@ -818,10 +831,13 @@ dns_rdata_fromstruct(dns_rdata_t *rdata, dns_rdataclass_t rdclass, if (use_default) (void)NULL; + length = isc_buffer_usedlength(target) - isc_buffer_usedlength(&st); + if (result == ISC_R_SUCCESS && length > DNS_RDATA_MAXLENGTH) + result = ISC_R_NOSPACE; + if (rdata != NULL && result == ISC_R_SUCCESS) { region.base = isc_buffer_used(&st); - region.length = isc_buffer_usedlength(target) - - isc_buffer_usedlength(&st); + region.length = length; dns_rdata_fromregion(rdata, rdclass, type, ®ion); } if (result != ISC_R_SUCCESS) diff --git a/lib/dns/rdataslab.c b/lib/dns/rdataslab.c index 6fbfdd7..cb9ae54 100644 --- a/lib/dns/rdataslab.c +++ b/lib/dns/rdataslab.c @@ -305,6 +305,7 @@ dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, length = x[i].rdata.length; if (rdataset->type == dns_rdatatype_rrsig) length++; + INSIST(length <= 0xffff); *rawbuf++ = (length & 0xff00) >> 8; *rawbuf++ = (length & 0x00ff); #if DNS_RDATASET_FIXED -- cgit v1.1 From ebdd69568d7fa97153aa47a86afe367476a0a1de Mon Sep 17 00:00:00 2001 From: neel Date: Tue, 25 Sep 2012 19:08:51 +0000 Subject: Add ioctls to control the X2APIC capability exposed by the virtual machine to the guest. At the moment this simply sets the state in the 'vcpu' instance but there is no code that acts upon these settings. --- lib/libvmmapi/vmmapi.c | 29 +++++++++++++++++++++++++++++ lib/libvmmapi/vmmapi.h | 4 ++++ 2 files changed, 33 insertions(+) (limited to 'lib') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index d7e6143..78ed368 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -537,6 +537,35 @@ vm_get_stat_desc(struct vmctx *ctx, int index) return (NULL); } +int +vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state) +{ + int error; + struct vm_x2apic x2apic; + + bzero(&x2apic, sizeof(x2apic)); + x2apic.cpuid = vcpu; + + error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic); + *state = x2apic.state; + return (error); +} + +int +vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state) +{ + int error; + struct vm_x2apic x2apic; + + bzero(&x2apic, sizeof(x2apic)); + x2apic.cpuid = vcpu; + x2apic.state = state; + + error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic); + + return (error); +} + /* * From Intel Vol 3a: * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 516bbc3..b918409 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -30,6 +30,7 @@ #define _VMMAPI_H_ struct vmctx; +enum x2apic_state; int vm_create(const char *name); struct vmctx *vm_open(const char *name); @@ -90,6 +91,9 @@ uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, int *ret_entries); const char *vm_get_stat_desc(struct vmctx *ctx, int index); +int vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *s); +int vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s); + /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); -- cgit v1.1 From 18dd2c0d511c600e708ac8f756e8e51151b43656 Mon Sep 17 00:00:00 2001 From: neel Date: Thu, 4 Oct 2012 02:27:14 +0000 Subject: Change vm_malloc() to map pages in the guest physical address space in 4KB chunks. This breaks the assumption that the entire memory segment is contiguously allocated in the host physical address space. This also paves the way to satisfy the 4KB page allocations by requesting free pages from the VM subsystem as opposed to hard-partitioning host memory at boot time. --- lib/libvmmapi/vmmapi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 78ed368..8364dd5 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -111,9 +111,10 @@ vm_destroy(struct vmctx *vm) { assert(vm != NULL); - DESTROY(vm->name); if (vm->fd >= 0) close(vm->fd); + DESTROY(vm->name); + free(vm); } @@ -151,7 +152,6 @@ vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, bzero(&seg, sizeof(seg)); seg.gpa = gpa; error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); - *ret_hpa = seg.hpa; *ret_len = seg.len; return (error); } -- cgit v1.1 From 09939583a708e1199d76573009937c49606d19e5 Mon Sep 17 00:00:00 2001 From: neel Date: Thu, 4 Oct 2012 03:07:05 +0000 Subject: The ioctl VM_GET_MEMORY_SEG is no longer able to return the host physical address associated with the guest memory segment. This is because there is no longer a 1:1 mapping between GPA and HPA. As a result 'vmmctl' can only display the guest physical address and the length of the lowmem and highmem segments. --- lib/libvmmapi/vmmapi.c | 3 +-- lib/libvmmapi/vmmapi.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 8364dd5..e083fde 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -143,8 +143,7 @@ vmm_get_mem_free(void) } int -vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, - vm_paddr_t *ret_hpa, size_t *ret_len) +vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len) { int error; struct vm_memory_segment seg; diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index b918409..3f434ab 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -37,8 +37,7 @@ struct vmctx *vm_open(const char *name); void vm_destroy(struct vmctx *ctx); size_t vmm_get_mem_total(void); size_t vmm_get_mem_free(void); -int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, - vm_paddr_t *ret_hpa, size_t *ret_len); +int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len); /* * Create a memory segment of 'len' bytes in the guest physical address space * at offset 'gpa'. -- cgit v1.1 From a62f9562ca8f5ed0f5099962cb864c87a2a2865c Mon Sep 17 00:00:00 2001 From: neel Date: Fri, 12 Oct 2012 17:39:28 +0000 Subject: Add an api to map a vm capability type into a string to be used for display purposes. --- lib/libvmmapi/vmmapi.c | 35 ++++++++++++++++++++++++----------- lib/libvmmapi/vmmapi.h | 1 + 2 files changed, 25 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index e083fde..fdbbbcb 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -370,22 +370,22 @@ vm_inject_nmi(struct vmctx *ctx, int vcpu) return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); } +static struct { + const char *name; + int type; +} capstrmap[] = { + { "hlt_exit", VM_CAP_HALT_EXIT }, + { "mtrap_exit", VM_CAP_MTRAP_EXIT }, + { "pause_exit", VM_CAP_PAUSE_EXIT }, + { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, + { 0 } +}; + int vm_capability_name2type(const char *capname) { int i; - static struct { - const char *name; - int type; - } capstrmap[] = { - { "hlt_exit", VM_CAP_HALT_EXIT }, - { "mtrap_exit", VM_CAP_MTRAP_EXIT }, - { "pause_exit", VM_CAP_PAUSE_EXIT }, - { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, - { 0 } - }; - for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { if (strcmp(capstrmap[i].name, capname) == 0) return (capstrmap[i].type); @@ -394,6 +394,19 @@ vm_capability_name2type(const char *capname) return (-1); } +const char * +vm_capability_type2name(int type) +{ + int i; + + for (i = 0; capstrmap[i].name != NULL; i++) { + if (capstrmap[i].type == type) + return (capstrmap[i].name); + } + + return (NULL); +} + int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int *retval) diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 3f434ab..fe5116e 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -70,6 +70,7 @@ int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector); int vm_inject_nmi(struct vmctx *ctx, int vcpu); int vm_capability_name2type(const char *capname); +const char *vm_capability_type2name(int type); int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int *retval); int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, -- cgit v1.1 From 1372a368e0c75317b5fcf37426faaabc995735c9 Mon Sep 17 00:00:00 2001 From: grehan Date: Fri, 26 Oct 2012 13:40:12 +0000 Subject: Remove mptable generation code from libvmmapi and move it to bhyve. Firmware tables require too much knowledge of system configuration, and it's difficult to pass that information in general terms to a library. The upcoming ACPI work exposed this - it will also livein bhyve. Also, remove code specific to NetApp from the mptable name, and remove the -n option from bhyve. Reviewed by: neel Obtained from: NetApp --- lib/libvmmapi/Makefile | 2 +- lib/libvmmapi/mptable.c | 342 ------------------------------------------------ lib/libvmmapi/mptable.h | 170 ------------------------ lib/libvmmapi/vmmapi.c | 13 -- lib/libvmmapi/vmmapi.h | 2 - 5 files changed, 1 insertion(+), 528 deletions(-) delete mode 100644 lib/libvmmapi/mptable.c delete mode 100644 lib/libvmmapi/mptable.h (limited to 'lib') diff --git a/lib/libvmmapi/Makefile b/lib/libvmmapi/Makefile index 82dde08..93d3c85 100644 --- a/lib/libvmmapi/Makefile +++ b/lib/libvmmapi/Makefile @@ -1,7 +1,7 @@ # $FreeBSD$ LIB= vmmapi -SRCS= vmmapi.c vmmapi_freebsd.c mptable.c +SRCS= vmmapi.c vmmapi_freebsd.c INCS= vmmapi.h WARNS?= 2 diff --git a/lib/libvmmapi/mptable.c b/lib/libvmmapi/mptable.c deleted file mode 100644 index 048ea2d..0000000 --- a/lib/libvmmapi/mptable.c +++ /dev/null @@ -1,342 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include - -#include -#include -#include -#include - -#include "vmmapi.h" -#include "mptable.h" - -#define LAPIC_PADDR (0xFEE00000) -#define LAPIC_VERSION (16) - -#define IOAPIC_PADDR (0xFEC00000) -#define IOAPIC_VERSION (0x11) - -extern int errno; - -static uint8_t -mp_compute_checksum(void *base, size_t len) -{ - uint8_t *bytes = base; - uint8_t sum = 0; - for(; len > 0; len--) { - sum += *bytes++; - } - return 256 - sum; -} - -static void -mp_build_mpfp(struct mp_floating_pointer *mpfp, vm_paddr_t mpfp_gpa) -{ - memset(mpfp, 0, sizeof(*mpfp)); - memcpy(mpfp->signature, MPFP_SIGNATURE, MPFP_SIGNATURE_LEN); - mpfp->mptable_paddr = mpfp_gpa + sizeof(*mpfp); - mpfp->specrev = MP_SPECREV; - mpfp->feature2 = 0; - mpfp->checksum = mp_compute_checksum(mpfp, sizeof(*mpfp)); -} - -static void -mp_build_mpch(struct mp_config_hdr *mpch) -{ - memset(mpch, 0, sizeof(*mpch)); - mpch->specrev = MP_SPECREV; - memcpy(mpch->signature, MPCH_SIGNATURE, MPCH_SIGNATURE_LEN); - memcpy(mpch->oemid, MPCH_OEMID, MPCH_OEMID_LEN); - memcpy(mpch->prodid, MPCH_PRODID, MPCH_PRODID_LEN); - mpch->lapic_paddr = LAPIC_PADDR; - - -} - -static void -mp_build_proc_entries(struct mpe_proc *mpep, int num_proc) -{ - int i; - - for (i = 0; i < num_proc; i++) { - memset(mpep, 0, sizeof(*mpep)); - mpep->entry_type = MP_ENTRY_PROC; - mpep->lapic_id = i; // XXX - mpep->lapic_version = LAPIC_VERSION; - mpep->proc_flags = (i == 0)?MPEP_FLAGS_BSP:0; - mpep->proc_flags |= MPEP_FLAGS_EN; - mpep->proc_signature = MPEP_SIGNATURE; - mpep->feature_flags = MPEP_FEATURES; - mpep++; - } - -} - -static void -mp_build_bus_entries(struct mpe_bus *mpeb) -{ - memset(mpeb, 0, sizeof(*mpeb)); - mpeb->entry_type = MP_ENTRY_BUS; - mpeb->busid = MPE_BUSID_ISA; - memcpy(mpeb->busname, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN); - mpeb++; - - memset(mpeb, 0, sizeof(*mpeb)); - mpeb->entry_type = MP_ENTRY_BUS; - mpeb->busid = MPE_BUSID_PCI; - memcpy(mpeb->busname, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN); - -} - -static void -mp_build_ioapic_entries(struct mpe_ioapic *mpei, int id) -{ - memset(mpei, 0, sizeof(*mpei)); - mpei->entry_type = MP_ENTRY_IOAPIC; - mpei->ioapic_id = id; - mpei->ioapic_version = IOAPIC_VERSION; - mpei->ioapic_flags = MPE_IOAPIC_FLAG_EN; - mpei->ioapic_paddr = IOAPIC_PADDR; -} - -#ifdef notyet -static void -mp_build_ioint_entries(struct mpe_ioint *mpeii, int num_pins, int id) -{ - int pin; - - /* - * The following config is taken from kernel mptable.c - * mptable_parse_default_config_ints(...), for now - * just use the default config, tweek later if needed. - */ - - - /* Run through all 16 pins. */ - for (pin = 0; pin < num_pins; pin++) { - memset(mpeii, 0, sizeof(*mpeii)); - mpeii->entry_type = MP_ENTRY_IOINT; - mpeii->src_bus_id = MPE_BUSID_ISA; - mpeii->dst_apic_id = id; - - /* - * All default configs route IRQs from bus 0 to the first 16 pins - * of the first I/O APIC with an APIC ID of 2. - */ - mpeii->dst_apic_intin = pin; - switch (pin) { - case 0: - /* Pin 0 is an ExtINT pin. */ - mpeii->intr_type = MPEII_INTR_EXTINT; - break; - case 2: - /* IRQ 0 is routed to pin 2. */ - mpeii->intr_type = MPEII_INTR_INT; - mpeii->src_bus_irq = 0; - break; - case 5: - case 10: - case 11: - /* - * PCI Irqs set to level triggered. - */ - mpeii->intr_flags = MPEII_FLAGS_TRIGMODE_LEVEL; - mpeii->src_bus_id = MPE_BUSID_PCI; - default: - /* All other pins are identity mapped. */ - mpeii->intr_type = MPEII_INTR_INT; - mpeii->src_bus_irq = pin; - break; - } - mpeii++; - } - -} - -#define COPYSTR(dest, src, bytes) \ - memcpy(dest, src, bytes); \ - str[bytes] = 0; - - -static void -mptable_dump(struct mp_floating_pointer *mpfp, struct mp_config_hdr *mpch) -{ - static char str[16]; - int i; - char *cur; - - union mpe { - struct mpe_proc *proc; - struct mpe_bus *bus; - struct mpe_ioapic *ioapic; - struct mpe_ioint *ioint; - struct mpe_lint *lnit; - char *p; - }; - - union mpe mpe; - - printf(" MP Floating Pointer :\n"); - COPYSTR(str, mpfp->signature, 4); - printf(" signature: %s\n", str); - printf(" mpch paddr: %x\n", mpfp->mptable_paddr); - printf(" length: %x\n", mpfp->length); - printf(" specrec: %x\n", mpfp->specrev); - printf(" checksum: %x\n", mpfp->checksum); - printf(" feature1: %x\n", mpfp->feature1); - printf(" feature2: %x\n", mpfp->feature2); - printf(" feature3: %x\n", mpfp->feature3); - printf(" feature4: %x\n", mpfp->feature4); - - printf(" MP Configuration Header :\n"); - COPYSTR(str, mpch->signature, 4); - printf(" signature: %s\n", str); - printf(" length: %x\n", mpch->length); - printf(" specrec: %x\n", mpch->specrev); - printf(" checksum: %x\n", mpch->checksum); - COPYSTR(str, mpch->oemid, MPCH_OEMID_LEN); - printf(" oemid: %s\n", str); - COPYSTR(str, mpch->prodid, MPCH_PRODID_LEN); - printf(" prodid: %s\n", str); - printf(" oem_ptr: %x\n", mpch->oem_ptr); - printf(" oem_sz: %x\n", mpch->oem_sz); - printf(" nr_entries: %x\n", mpch->nr_entries); - printf(" apic paddr: %x\n", mpch->lapic_paddr); - printf(" ext_length: %x\n", mpch->ext_length); - printf(" ext_checksum: %x\n", mpch->ext_checksum); - - cur = (char *)mpch + sizeof(*mpch); - for (i = 0; i < mpch->nr_entries; i++) { - mpe.p = cur; - switch(*mpe.p) { - case MP_ENTRY_PROC: - printf(" MP Processor Entry :\n"); - printf(" lapic_id: %x\n", mpe.proc->lapic_id); - printf(" lapic_version: %x\n", mpe.proc->lapic_version); - printf(" proc_flags: %x\n", mpe.proc->proc_flags); - printf(" proc_signature: %x\n", mpe.proc->proc_signature); - printf(" feature_flags: %x\n", mpe.proc->feature_flags); - cur += sizeof(struct mpe_proc); - break; - case MP_ENTRY_BUS: - printf(" MP Bus Entry :\n"); - printf(" busid: %x\n", mpe.bus->busid); - COPYSTR(str, mpe.bus->busname, MPE_BUSNAME_LEN); - printf(" busname: %s\n", str); - cur += sizeof(struct mpe_bus); - break; - case MP_ENTRY_IOAPIC: - printf(" MP IOAPIC Entry :\n"); - printf(" ioapi_id: %x\n", mpe.ioapic->ioapic_id); - printf(" ioapi_version: %x\n", mpe.ioapic->ioapic_version); - printf(" ioapi_flags: %x\n", mpe.ioapic->ioapic_flags); - printf(" ioapi_paddr: %x\n", mpe.ioapic->ioapic_paddr); - cur += sizeof(struct mpe_ioapic); - break; - case MP_ENTRY_IOINT: - printf(" MP IO Interrupt Entry :\n"); - printf(" intr_type: %x\n", mpe.ioint->intr_type); - printf(" intr_flags: %x\n", mpe.ioint->intr_flags); - printf(" src_bus_id: %x\n", mpe.ioint->src_bus_id); - printf(" src_bus_irq: %x\n", mpe.ioint->src_bus_irq); - printf(" dst_apic_id: %x\n", mpe.ioint->dst_apic_id); - printf(" dst_apic_intin: %x\n", mpe.ioint->dst_apic_intin); - cur += sizeof(struct mpe_ioint); - break; - case MP_ENTRY_LINT: - printf(" MP Local Interrupt Entry :\n"); - cur += sizeof(struct mpe_lint); - break; - } - - } -} -#endif - -int -vm_build_mptable(struct vmctx *ctx, vm_paddr_t gpa, int len, int ncpu, - int ioapic, void *oemp, int oemsz) -{ - struct mp_config_hdr *mpch; - char *mapaddr; - char *startaddr; - int error; - - mapaddr = vm_map_memory(ctx, gpa, len); - if (mapaddr == MAP_FAILED) { - printf("%s\n", strerror(errno)); - goto err; - } - startaddr = mapaddr; - - mp_build_mpfp((struct mp_floating_pointer*) mapaddr, gpa); - mapaddr += sizeof(struct mp_floating_pointer); - - mpch = (struct mp_config_hdr*)mapaddr; - mp_build_mpch(mpch); - mapaddr += sizeof(struct mp_config_hdr); - - mp_build_proc_entries((struct mpe_proc*) mapaddr, ncpu); - mapaddr += (sizeof(struct mpe_proc)*ncpu); - mpch->nr_entries += ncpu; - - mp_build_bus_entries((struct mpe_bus*)mapaddr); - mapaddr += (sizeof(struct mpe_bus)*MPE_NUM_BUSES); - mpch->nr_entries += MPE_NUM_BUSES; - - if (ioapic) { - mp_build_ioapic_entries((struct mpe_ioapic*)mapaddr, ncpu + 1); - mapaddr += sizeof(struct mpe_ioapic); - mpch->nr_entries++; - } - -#ifdef notyet - mp_build_ioint_entries((struct mpe_ioint*)mapaddr, MPEII_MAX_IRQ, - ncpu + 1); - mapaddr += sizeof(struct mpe_ioint)*MPEII_MAX_IRQ; - mpch->nr_entries += MPEII_MAX_IRQ; - -#endif - if (oemp) { - mpch->oem_ptr = mapaddr - startaddr + gpa; - mpch->oem_sz = oemsz; - memcpy(mapaddr, oemp, oemsz); - } - mpch->length = (mapaddr) - ((char*) mpch); - mpch->checksum = mp_compute_checksum(mpch, sizeof(*mpch)); - - - // mptable_dump((struct mp_floating_pointer*)startaddr, mpch); -err: - return (error); -} diff --git a/lib/libvmmapi/mptable.h b/lib/libvmmapi/mptable.h deleted file mode 100644 index 51c7bc1..0000000 --- a/lib/libvmmapi/mptable.h +++ /dev/null @@ -1,170 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _MPTABLE_h_ -#define _MPTABLE_h_ - -#define MP_SPECREV (4) // MP spec revision 1.1 - -/* - * MP Floating Pointer Structure - */ -#define MPFP_SIGNATURE "_MP_" -#define MPFP_SIGNATURE_LEN (4) -#define MPFP_FEATURE2 (0x80) // IMCR is present -struct mp_floating_pointer { - uint8_t signature[MPFP_SIGNATURE_LEN]; - uint32_t mptable_paddr; - uint8_t length; - uint8_t specrev; - uint8_t checksum; - uint8_t feature1; - uint8_t feature2; - uint8_t feature3; - uint8_t feature4; - uint8_t feature5; -}; - - -/* - * MP Configuration Table Header - */ -#define MPCH_SIGNATURE "PCMP" -#define MPCH_SIGNATURE_LEN (4) - -#define MPCH_OEMID "NETAPP " -#define MPCH_OEMID_LEN (8) -#define MPCH_PRODID "vFiler " -#define MPCH_PRODID_LEN (12) - -struct mp_config_hdr { - uint8_t signature[MPCH_SIGNATURE_LEN]; - uint16_t length; - uint8_t specrev; - uint8_t checksum; - uint8_t oemid[MPCH_OEMID_LEN]; - uint8_t prodid[MPCH_PRODID_LEN]; - uint32_t oem_ptr; - uint16_t oem_sz; - uint16_t nr_entries; - uint32_t lapic_paddr; - uint16_t ext_length; - uint8_t ext_checksum; - uint8_t reserved; -}; - -#define MP_ENTRY_PROC (0) -#define MP_ENTRY_BUS (1) -#define MP_ENTRY_IOAPIC (2) -#define MP_ENTRY_IOINT (3) -#define MP_ENTRY_LINT (4) - -/* - * MP Processor Entry - */ - -#define MPEP_FLAGS_EN (0x1) -#define MPEP_FLAGS_BSP (0x2) - -#define MPEP_SIG_FAMILY (6) -#define MPEP_SIG_MODEL (26) -#define MPEP_SIG_STEPPING (5) -#define MPEP_SIGNATURE ((MPEP_SIG_FAMILY << 8) | (MPEP_SIG_MODEL << 4) \ - | (MPEP_SIG_STEPPING)) - -#define MPEP_FEATURES (0xBFEBFBFF) // Value from Intel i7 CPUID - -struct mpe_proc { - uint8_t entry_type; - uint8_t lapic_id; - uint8_t lapic_version; - uint8_t proc_flags; - uint32_t proc_signature; - uint32_t feature_flags; - uint8_t reserved[8]; -}; - -/* - * MP Bus Entry - */ - -#define MPE_NUM_BUSES (2) -#define MPE_BUSNAME_LEN (6) -#define MPE_BUSID_ISA (0) -#define MPE_BUSID_PCI (1) -#define MPE_BUSNAME_ISA "ISA " -#define MPE_BUSNAME_PCI "PCI " -struct mpe_bus { - uint8_t entry_type; - uint8_t busid; - uint8_t busname[MPE_BUSNAME_LEN]; -}; - -/* - * MP IO APIC Entry - */ -#define MPE_IOAPIC_FLAG_EN (1) -struct mpe_ioapic { - uint8_t entry_type; - uint8_t ioapic_id; - uint8_t ioapic_version; - uint8_t ioapic_flags; - uint32_t ioapic_paddr; - -}; - -/* - * MP IO Interrupt Assignment Entry - */ -#define MPEII_INTR_INT (0) -#define MPEII_INTR_NMI (1) -#define MPEII_INTR_SMI (2) -#define MPEII_INTR_EXTINT (3) -#define MPEII_PCI_IRQ_MASK (0x0c20U) /* IRQ 5,10,11 are PCI connected */ -#define MPEII_MAX_IRQ (16) -#define MPEII_FLAGS_TRIGMODE_LEVEL (0x3) -struct mpe_ioint { - uint8_t entry_type; - uint8_t intr_type; - uint16_t intr_flags; - uint8_t src_bus_id; - uint8_t src_bus_irq; - uint8_t dst_apic_id; - uint8_t dst_apic_intin; -}; - -/* - * MP Local Interrupt Assignment Entry - */ -struct mpe_lint { - uint8_t entry_type; -}; - -int vm_build_mptable(struct vmctx *ctxt, vm_paddr_t gpa, int len, - int ncpu, int ioapic, void *oemp, int oemsz); -#endif /* _MPTABLE_h_ */ diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index fdbbbcb..cfb42d0 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -47,10 +47,6 @@ __FBSDID("$FreeBSD$"); #include #include "vmmapi.h" -#include "mptable.h" - -#define BIOS_ROM_BASE (0xf0000) -#define BIOS_ROM_SIZE (0x10000) struct vmctx { int fd; @@ -329,15 +325,6 @@ vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, } int -vm_build_tables(struct vmctx *ctxt, int ncpu, int ioapic, - void *oemtbl, int oemtblsz) -{ - - return (vm_build_mptable(ctxt, BIOS_ROM_BASE, BIOS_ROM_SIZE, ncpu, - ioapic, oemtbl, oemtblsz)); -} - -int vm_apicid2vcpu(struct vmctx *ctx, int apicid) { /* diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index fe5116e..de04252 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -60,8 +60,6 @@ int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid); int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *ret_vmexit); -int vm_build_tables(struct vmctx *ctxt, int ncpus, int ioapic, - void *oemtbl, int oemtblsz); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector); -- cgit v1.1 From 9413d826123906120b1317ad76bcc03cada2f98a Mon Sep 17 00:00:00 2001 From: dim Date: Mon, 5 Nov 2012 18:49:21 +0000 Subject: Fix a few warnings from newer clang 3.2 in libpmc, about comparing enum pmc_event values against integer constants which fall outside the enum range. Reviewed by: fabient, sbruno MFC after: 3 days --- lib/libpmc/libpmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 85c460d..4fb2572 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -2264,7 +2264,7 @@ soft_allocate_pmc(enum pmc_event pe, char *ctrspec, (void)ctrspec; (void)pmc_config; - if (pe < PMC_EV_SOFT_FIRST || pe > PMC_EV_SOFT_LAST) + if ((int)pe < PMC_EV_SOFT_FIRST || (int)pe > PMC_EV_SOFT_LAST) return (-1); pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); @@ -3190,7 +3190,7 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) } else if (pe == PMC_EV_TSC_TSC) { ev = tsc_event_table; evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc); - } else if (pe >= PMC_EV_SOFT_FIRST && pe <= PMC_EV_SOFT_LAST) { + } else if ((int)pe >= PMC_EV_SOFT_FIRST && (int)pe <= PMC_EV_SOFT_LAST) { ev = soft_event_table; evfence = soft_event_table + soft_event_info.pm_nevent; } -- cgit v1.1 From de6ea8b20e870490db809a8d8a965bd784981d81 Mon Sep 17 00:00:00 2001 From: marcel Date: Wed, 7 Nov 2012 00:23:51 +0000 Subject: Remove -L from LDADD, it doesn't belong there. Add it to LDFLAGS instead. Submitted by: Garrett Cooper --- lib/atf/libatf-c++/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/atf/libatf-c++/Makefile b/lib/atf/libatf-c++/Makefile index 71369e2..37d6073 100644 --- a/lib/atf/libatf-c++/Makefile +++ b/lib/atf/libatf-c++/Makefile @@ -32,7 +32,9 @@ SHLIB_MAJOR= 1 # libatf-c++ depends on the C version of the ATF library to build. DPADD= ${LIBATFC} -LDADD= -L${.OBJDIR}/../libatf-c -latf-c +LDADD= -latf-c + +LDFLAGS+= -L${.OBJDIR}/../libatf-c .PATH: ${ATF} .PATH: ${ATF}/atf-c++ -- cgit v1.1 From 25611f9cf9abb970322ecc6e9eb957d8f47b4819 Mon Sep 17 00:00:00 2001 From: kevlo Date: Wed, 7 Nov 2012 07:00:59 +0000 Subject: Fix typo; s/ouput/output --- lib/msun/src/k_rem_pio2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/msun/src/k_rem_pio2.c b/lib/msun/src/k_rem_pio2.c index a2ffca6..3942441 100644 --- a/lib/msun/src/k_rem_pio2.c +++ b/lib/msun/src/k_rem_pio2.c @@ -45,7 +45,7 @@ __FBSDID("$FreeBSD$"); * z = (z-x[i])*2**24 * * - * y[] ouput result in an array of double precision numbers. + * y[] output result in an array of double precision numbers. * The dimension of y[] is: * 24-bit precision 1 * 53-bit precision 2 -- cgit v1.1 From 79b7e755b41a4f7e2a36e4a7c6122f35fba258eb Mon Sep 17 00:00:00 2001 From: sjg Date: Wed, 7 Nov 2012 22:03:59 +0000 Subject: Simple unit-tests for libcrypt, to show how easy it is. Approved by: marcel (mentor) --- lib/libcrypt/tests/Makefile | 10 ++++++++ lib/libcrypt/tests/crypt_tests.c | 54 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 lib/libcrypt/tests/Makefile create mode 100644 lib/libcrypt/tests/crypt_tests.c (limited to 'lib') diff --git a/lib/libcrypt/tests/Makefile b/lib/libcrypt/tests/Makefile new file mode 100644 index 0000000..3190dbe --- /dev/null +++ b/lib/libcrypt/tests/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +# exercise libcrypt + +TESTS_C= crypt_tests + +CFLAGS+= -I${.CURDIR:H} +LDADD+= -L${.OBJDIR:H} -lcrypt + +.include diff --git a/lib/libcrypt/tests/crypt_tests.c b/lib/libcrypt/tests/crypt_tests.c new file mode 100644 index 0000000..3331d12 --- /dev/null +++ b/lib/libcrypt/tests/crypt_tests.c @@ -0,0 +1,54 @@ +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include + +#define LEET "0.s0.l33t" + +ATF_TC(md5); +ATF_TC_HEAD(md5, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Tests the MD5 based password hash"); +} + +ATF_TC_BODY(md5, tc) +{ + const char want[] = "$1$deadbeef$0Huu6KHrKLVWfqa4WljDE0"; + char *pw; + + pw = crypt(LEET, want); + ATF_CHECK_STREQ(pw, want); +} + +ATF_TC(invalid); +ATF_TC_HEAD(invalid, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Tests that invalid password fails"); +} + +ATF_TC_BODY(invalid, tc) +{ + const char want[] = "$1$cafebabe$0Huu6KHrKLVWfqa4WljDE0"; + char *pw; + + pw = crypt(LEET, want); + ATF_CHECK(strcmp(pw, want) != 0); +} + +/* + * This function must not do anything except enumerate + * the test cases, else atf-run is likely to be upset. + */ +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, md5); + ATF_TP_ADD_TC(tp, invalid); + return atf_no_error(); +} -- cgit v1.1 From 99d6a5644c78b3f3504ecbf7be003b7c3e99a62a Mon Sep 17 00:00:00 2001 From: jhibbits Date: Wed, 7 Nov 2012 23:45:09 +0000 Subject: Implement DTrace for PowerPC. This includes both 32-bit and 64-bit. There is one known issue: Some probes will display an error message along the lines of: "Invalid address (0)" I tested this with both a simple dtrace probe and dtruss on a few different binaries on 32-bit. I only compiled 64-bit, did not run it, but I don't expect problems without the modules loaded. Volunteers are welcome. MFC after: 1 month --- lib/Makefile | 5 +++++ lib/libproc/proc_bkpt.c | 3 +++ lib/libproc/proc_regs.c | 8 ++++++++ 3 files changed, 16 insertions(+) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 199e08b..7c3ccbd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -207,6 +207,11 @@ _libproc= libproc _librtld_db= librtld_db .endif +.if ${MACHINE_CPUARCH} == "powerpc" +_libproc= libproc +_librtld_db= librtld_db +.endif + .if ${MK_OPENSSL} != "no" _libmp= libmp .endif diff --git a/lib/libproc/proc_bkpt.c b/lib/libproc/proc_bkpt.c index e16b0fc..c15e53c 100644 --- a/lib/libproc/proc_bkpt.c +++ b/lib/libproc/proc_bkpt.c @@ -47,6 +47,9 @@ __FBSDID("$FreeBSD$"); #elif defined(__mips__) #define BREAKPOINT_INSTR 0xd /* break */ #define BREAKPOINT_INSTR_SZ 4 +#elif defined(__powerpc__) +#define BREAKPOINT_INSTR 0x7fe00008 /* trap */ +#define BREAKPOINT_INSTR_SZ 4 #else #error "Add support for your architecture" #endif diff --git a/lib/libproc/proc_regs.c b/lib/libproc/proc_regs.c index c299b9b..aac0125 100644 --- a/lib/libproc/proc_regs.c +++ b/lib/libproc/proc_regs.c @@ -60,6 +60,8 @@ proc_regget(struct proc_handle *phdl, proc_reg_t reg, unsigned long *regvalue) *regvalue = regs.r_eip; #elif defined(__mips__) *regvalue = regs.r_regs[PC]; +#elif defined(__powerpc__) + *regvalue = regs.pc; #endif break; case REG_SP: @@ -69,6 +71,8 @@ proc_regget(struct proc_handle *phdl, proc_reg_t reg, unsigned long *regvalue) *regvalue = regs.r_esp; #elif defined(__mips__) *regvalue = regs.r_regs[SP]; +#elif defined(__powerpc__) + *regvalue = regs.fixreg[1]; #endif break; default: @@ -99,6 +103,8 @@ proc_regset(struct proc_handle *phdl, proc_reg_t reg, unsigned long regvalue) regs.r_eip = regvalue; #elif defined(__mips__) regs.r_regs[PC] = regvalue; +#elif defined(__powerpc__) + regs.pc = regvalue; #endif break; case REG_SP: @@ -108,6 +114,8 @@ proc_regset(struct proc_handle *phdl, proc_reg_t reg, unsigned long regvalue) regs.r_esp = regvalue; #elif defined(__mips__) regs.r_regs[PC] = regvalue; +#elif defined(__powerpc__) + regs.fixreg[1] = regvalue; #endif break; default: -- cgit v1.1 From 8af663c65db48078492379d8dc6a7d55934ea502 Mon Sep 17 00:00:00 2001 From: grog Date: Thu, 8 Nov 2012 02:01:04 +0000 Subject: Clarify that the ' flag is an apostrophe. MFC after: 2 weeks --- lib/libc/stdio/printf.3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/stdio/printf.3 b/lib/libc/stdio/printf.3 index 0d9339e..05c30dc 100644 --- a/lib/libc/stdio/printf.3 +++ b/lib/libc/stdio/printf.3 @@ -267,7 +267,7 @@ number produced by a signed conversion. A .Cm + overrides a space if both are used. -.It Sq Cm ' +.It So "'" Sc (apostrophe) Decimal conversions .Cm ( d , u , or -- cgit v1.1 From 464808b6f100795216849a2948e345eeaac23fec Mon Sep 17 00:00:00 2001 From: dim Date: Sat, 10 Nov 2012 21:22:10 +0000 Subject: Only define isnan, isnanf, __isnan and __isnanf in libc.so, not in libc.a and libc_p.a. In addition, define isnan in libm.a and libm_p.a, but not in libm.so. This makes it possible to statically link executables using both isnan and isnanf with libc and libm. Tested by: kargl MFC after: 1 week --- lib/libc/gen/isnan.c | 2 ++ lib/msun/src/s_isnan.c | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/isnan.c b/lib/libc/gen/isnan.c index ec81362..64b0ac1 100644 --- a/lib/libc/gen/isnan.c +++ b/lib/libc/gen/isnan.c @@ -35,6 +35,7 @@ * binary compat until we can bump libm's major version number. */ +#ifdef PIC __weak_reference(__isnan, isnan); __weak_reference(__isnanf, isnanf); @@ -55,3 +56,4 @@ __isnanf(float f) u.f = f; return (u.bits.exp == 255 && u.bits.man != 0); } +#endif /* PIC */ diff --git a/lib/msun/src/s_isnan.c b/lib/msun/src/s_isnan.c index 0f544db..a54ded3 100644 --- a/lib/msun/src/s_isnan.c +++ b/lib/msun/src/s_isnan.c @@ -30,8 +30,9 @@ #include "fpmath.h" -/* Provided by libc */ -#if 0 +/* Provided by libc.so */ +#ifndef PIC +#undef isnan int isnan(double d) { @@ -40,7 +41,7 @@ isnan(double d) u.d = d; return (u.bits.exp == 2047 && (u.bits.manl != 0 || u.bits.manh != 0)); } -#endif +#endif /* !PIC */ int __isnanf(float f) -- cgit v1.1 From 551d372a1bddcd79984b86718d69d54076349901 Mon Sep 17 00:00:00 2001 From: dim Date: Sun, 11 Nov 2012 13:28:04 +0000 Subject: Add an explanatory comment to lib/libc/gen/isnan.c about the fix to make static linking with libc and libm work. Requested by: jilles MFC after: 1 week X-MFC-With: 242879 --- lib/libc/gen/isnan.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/libc/gen/isnan.c b/lib/libc/gen/isnan.c index 64b0ac1..72c2868 100644 --- a/lib/libc/gen/isnan.c +++ b/lib/libc/gen/isnan.c @@ -33,6 +33,11 @@ /* * XXX These routines belong in libm, but they must remain in libc for * binary compat until we can bump libm's major version number. + * + * Note this only applies to the dynamic versions of libm and libc, so + * for the static and profiled versions we stub out the definitions. + * Otherwise you cannot link statically to libm and libc at the same + * time, when calling both functions. */ #ifdef PIC -- cgit v1.1 From 37c97ba01ba0cd42b9334d786dcadbf99cbb2456 Mon Sep 17 00:00:00 2001 From: kib Date: Tue, 13 Nov 2012 12:55:52 +0000 Subject: Implement the waitid() SUSv4 function using wait6() system call. PR: standards/170346 Submitted by: "Jukka A. Ukkonen" MFC after: 1 month --- lib/libc/gen/Makefile.inc | 2 +- lib/libc/gen/Symbol.map | 1 + lib/libc/gen/waitid.c | 65 +++++++++++++++++++++++++++++++++++++++++ lib/libc/include/namespace.h | 1 + lib/libc/include/un-namespace.h | 1 + lib/libc/sys/Symbol.map | 3 ++ 6 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 lib/libc/gen/waitid.c (limited to 'lib') diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 2481f28..906f4ce 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -34,7 +34,7 @@ SRCS+= __getosreldate.c __xuname.c \ syslog.c telldir.c termios.c time.c times.c timezone.c tls.c \ ttyname.c ttyslot.c ualarm.c ulimit.c uname.c unvis.c \ usleep.c utime.c utxdb.c valloc.c vis.c wait.c wait3.c waitpid.c \ - wordexp.c + waitid.c wordexp.c .PATH: ${.CURDIR}/../../contrib/libc-pwcache SRCS+= pwcache.c pwcache.h diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index b4f88ae..f5d4f71 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -391,6 +391,7 @@ FBSD_1.3 { pwcache_userdb; pwcache_groupdb; uid_from_user; + waitid; }; FBSDprivate_1.0 { diff --git a/lib/libc/gen/waitid.c b/lib/libc/gen/waitid.c new file mode 100644 index 0000000..795b208 --- /dev/null +++ b/lib/libc/gen/waitid.c @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 2012 Jukka A. Ukkonen + * All rights reserved. + * + * This software was developed by Jukka Ukkonen for FreeBSD. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include +#include +#include +#include +#include +#include "un-namespace.h" + +int +__waitid(idtype_t idtype, id_t id, siginfo_t *info, int flags) +{ + int status; + pid_t ret; + + ret = _wait6(idtype, id, &status, flags, NULL, info); + + /* + * According to SUSv4, waitid() shall not return a PID when a + * process is found, but only 0. If a process was actually + * found, siginfo_t fields si_signo and si_pid will be + * non-zero. In case WNOHANG was set in the flags and no + * process was found those fields are set to zero using + * memset() below. + */ + if (ret == 0 && info != NULL) + memset(info, 0, sizeof(*info)); + else if (ret > 0) + ret = 0; + return (ret); +} + +__weak_reference(__waitid, waitid); +__weak_reference(__waitid, _waitid); diff --git a/lib/libc/include/namespace.h b/lib/libc/include/namespace.h index 1e00030..739d7b1 100644 --- a/lib/libc/include/namespace.h +++ b/lib/libc/include/namespace.h @@ -229,6 +229,7 @@ #define socketpair _socketpair #define usleep _usleep #define wait4 _wait4 +#define wait6 _wait6 #define waitpid _waitpid #define write _write #define writev _writev diff --git a/lib/libc/include/un-namespace.h b/lib/libc/include/un-namespace.h index 9c9ce97..f31fa7a 100644 --- a/lib/libc/include/un-namespace.h +++ b/lib/libc/include/un-namespace.h @@ -210,6 +210,7 @@ #undef socketpair #undef usleep #undef wait4 +#undef wait6 #undef waitpid #undef write #undef writev diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index 008b8da..babae30 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -384,6 +384,7 @@ FBSD_1.3 { ffclock_getestimate; ffclock_setestimate; posix_fadvise; + wait6; }; FBSDprivate_1.0 { @@ -1019,6 +1020,8 @@ FBSDprivate_1.0 { __sys_vadvise; _wait4; __sys_wait4; + _wait6; + __sys_wait6; _write; __sys_write; _writev; -- cgit v1.1 From 39bfcc5eed2a3f48c8f7e92b61173fa2f7edb3a9 Mon Sep 17 00:00:00 2001 From: kib Date: Tue, 13 Nov 2012 12:56:42 +0000 Subject: Document wait6() and waitid(). PR: standards/170346 Submitted by: "Jukka A. Ukkonen" MFC after: 1 month --- lib/libc/sys/Makefile.inc | 3 +- lib/libc/sys/wait.2 | 311 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 282 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index df4ef42..a35ed5d 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -217,5 +217,6 @@ MLINKS+=timer_settime.2 timer_getoverrun.2 timer_settime.2 timer_gettime.2 MLINKS+=truncate.2 ftruncate.2 MLINKS+=unlink.2 unlinkat.2 MLINKS+=utimes.2 futimes.2 utimes.2 futimesat.2 utimes.2 lutimes.2 -MLINKS+=wait.2 wait3.2 wait.2 wait4.2 wait.2 waitpid.2 +MLINKS+=wait.2 wait3.2 wait.2 wait4.2 wait.2 waitpid.2 \ + wait.2 waitid.2 wait.2 wait6.2 MLINKS+=write.2 pwrite.2 write.2 pwritev.2 write.2 writev.2 diff --git a/lib/libc/sys/wait.2 b/lib/libc/sys/wait.2 index 71bd529..0c494bb 100644 --- a/lib/libc/sys/wait.2 +++ b/lib/libc/sys/wait.2 @@ -28,15 +28,17 @@ .\" @(#)wait.2 8.2 (Berkeley) 4/19/94 .\" $FreeBSD$ .\" -.Dd November 12, 2005 +.Dd November 10, 2012 .Dt WAIT 2 .Os .Sh NAME .Nm wait , +.Nm waitid , .Nm waitpid , +.Nm wait3 , .Nm wait4 , -.Nm wait3 -.Nd wait for process termination +.Nm wait6 +.Nd wait for processes to change status .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -46,12 +48,17 @@ .Fn wait "int *status" .Ft pid_t .Fn waitpid "pid_t wpid" "int *status" "int options" +.In sys/signal.h +.Ft int +.Fn waitid "idtype_t idtype" "id_t id" "siginfo_t *info" "int options" .In sys/time.h .In sys/resource.h .Ft pid_t .Fn wait3 "int *status" "int options" "struct rusage *rusage" .Ft pid_t .Fn wait4 "pid_t wpid" "int *status" "int options" "struct rusage *rusage" +.Ft pid_t +.Fn wait6 "idtype_t idtype" "id_t id" "int *status" "int options" "struct __wrusage *wrusage" "siginfo_t *infop" .Sh DESCRIPTION The .Fn wait @@ -86,28 +93,172 @@ system call provides a more general interface for programs that need to wait for certain child processes, that need resource utilization statistics accumulated by child processes, or that require options. -The other wait functions are implemented using -.Fn wait4 . .Pp +The broadest interface of all functions in this family is +.Fn wait6 +which is otherwise very much like +.Fn wait4 +but with a few very important distinctions. +To wait for exited processes, the option flag +.Dv WEXITED +need to be explicitly specified. +This allows for waiting for processes which have experienced other +status changes without having to handle also the exit status from +the terminated processes. +Instead of the traditional +.Dv rusage +argument, a pointer to a new structure +.Bd -literal +struct __wrusage { + struct rusage wru_self; + struct rusage wru_children; +}; +.Ed +can be passed. +This allows the calling process to collect resource usage statistics +from both its own child process as well as from its grand children. +When no resource usage statistics are needed this pointer can be +.Dv NULL . +The last argument +.Fa infop +must be either +.Dv NULL +or a pointer to a +.Fa siginfo_t +structure. +When specified, the structure is filled the same as for +.Dv SIGNCHLD +signal, delivered at the process state change. +.br +The process, which state is queried, is specified by two arguments +.Fa idtype +and +.Fa id . +The separate +.Fa idtype +and +.Fa id +arguments allows to support many other types of +IDs as well in addition to PID and PGID. +.Bl -bullet -offset indent +.It +If +.Fa idtype +is +.Dv P_PID , +.Fn waitid +and +.Fn wait6 +wait for the child process with a process ID equal to +.Dv (pid_t)id . +.It +If +.Fa idtype +is +.Dv P_PGID , +.Fn waitid +and +.Fn wait6 +wait for the child process with a process group ID equal to +.Dv (pid_t)id . +.It +If +.Fa idtype +is +.Dv P_ALL , +.Fn waitid +and +.Fn wait6 +wait for any child process and the +.Dv id +is ignored. +.It +If +.Fa idtype +is +.Dv P_PID +or +.Dv P_PGID +and the +.Dv id +is zero, +.Fn waitid +and +.Fn wait6 +wait for any child process in the same process group as the caller. +.El +.Pp +Non-standard specifiers for the process to wait for, supported by this +implementation of +.Fn waitid +and +.Fn wait6 , +are: +.Bl -bullet -offset indent +.It The +.Fa idtype +value +.Dv P_UID +waits for processes which effective UID is equal to +.Dv (uid_t)id . +.It +The +.Fa idtype +value +.Dv P_GID +waits for processes which effective GID is equal to +.Dv (gid_t)id . +.It +The +.Fa idtype +value +.Dv P_SID +waits for processes which session ID is equal to +.Dv id . +In case the child process started its own new session, +SID will be the same as its own PID. +Otherwise the SID of a child process will match the caller's SID. +.It +The +.Fa idtype +value +.Dv P_JAILID +waits for processes within a jail which jail identifier is equal +to +.Dv id . +.El +.Pp +For +.Fn wait , +.Fn wait3 , +and +.Fn wait4 +functions, the single .Fa wpid argument specifies the set of child processes for which to wait. +.Bl -bullet -offset indent +.It If .Fa wpid is -1, the call waits for any child process. +.It If .Fa wpid is 0, the call waits for any child process in the process group of the caller. +.It If .Fa wpid is greater than zero, the call waits for the process with process id .Fa wpid . +.It If .Fa wpid is less than -1, the call waits for any process whose process group id equals the absolute value of .Fa wpid . +.El .Pp The .Fa status @@ -116,41 +267,102 @@ argument is defined below. The .Fa options argument contains the bitwise OR of any of the following options. -The -.Dv WCONTINUED -option indicates that children of the current process that +.Bl -tag -width Ds +.It Dv WCONTINUED +indicates that children of the current process that have continued from a job control stop, by receiving a .Dv SIGCONT signal, should also have their status reported. -The -.Dv WNOHANG -option -is used to indicate that the call should not block if -there are no processes that wish to report status. -If the -.Dv WUNTRACED -option is set, -children of the current process that are stopped +.It Dv WNOHANG +is used to indicate that the call should not block when +there are no processes wishing to report status. +.It Dv WUNTRACED +indicates that children of the current process which are stopped due to a .Dv SIGTTIN , SIGTTOU , SIGTSTP , or .Dv SIGSTOP -signal also have their status reported. -The -.Dv WSTOPPED -option is an alias for +signal shall have their status reported. +.It Dv WSTOPPED +is an alias for .Dv WUNTRACED . -The -.Dv WNOWAIT -option keeps the process whose status is returned in a waitable state. +.It Dv WTRAPPED +allows waiting for processes which have trapped or reached a breakpoint. +.It Dv WEXITED +indicates that the caller is wants to receive status reports from +terminated processes. +This flag is implicitly set for the functions +.Fn wait , +.Fn waitpid , +.Fn wait3 , +and +.Fn wait4 . +.br +For the +.Fn waitid +and +.Fn wait6 +functions, the flag has to be explicitly included in the +.Fa options , +if status reports from terminated processes are expected. +.It Dv WNOWAIT +keeps the process whose status is returned in a waitable state. The process may be waited for again after this call completes. +.El +.sp +For the +.Fn waitid +and +.Fn wait6 +functions, at least one of the options +.Dv WEXITED , +.Dv WUNTRACED , +.Dv WSTOPPED , +.Dv WTRAPPED , +or +.Dv WCONTINUED +must be specified. +Otherwise there will be no events for the call to report. +To avoid hanging indefinitely in such a case these functions +return -1 with +.Dv errno +set to +.Dv EINVAL . .Pp If .Fa rusage -is non-zero, a summary of the resources used by the terminated -process and all its -children is returned (this information is currently not available -for stopped or continued processes). +is non-NULL, a summary of the resources used by the terminated +process and all its children is returned. +.Pp +If +.Fa wrusage +argument is non-NULL, a resource usage statistics +from both its own child process as well as from its grand children +is returned. +.Pp +If +.Fa infop +is non-NULL, it must point to a +.Dv siginfo_t +structure which is filled on return such that the +.Dv si_signo +field is always +.Dv SIGCHLD +and the field +.Dv si_pid +if be non-zero, if there is a status change to report. +If there are no status changes to report and WNOHANG is applied, +both of these fields are returned zero. +When using the +.Fn waitid +function with the +.Dv WNOHANG +option set, checking these fields is the only way to know whether +there were any status changes to report, because the return value +from +.Fn waitid +is be zero as it is for any successful return from +.Fn waitid . .Pp When the .Dv WNOHANG @@ -175,6 +387,20 @@ call is the same as with a .Fa wpid value of -1. +The +.Fn wait6 +call, with the bits +.Dv WEXITED +and +.Dv WTRAPPED +set in the +.Fa options +and with +.Fa infop +set to +.Dv NULL , +is similar to +.Fn wait4 . .Pp The following macros may be used to test the manner of exit of the process. One of the first four macros will evaluate to a non-zero (true) value: @@ -284,6 +510,7 @@ is returned and is set to indicate the error. .Pp If +.Fn wait6 , .Fn wait4 , .Fn wait3 , or @@ -306,6 +533,18 @@ a value of -1 is returned and .Va errno is set to indicate the error. +.Pp +If +.Fn waitid +returns because one or more processes have a state change to report, +0 is returned. +To indicate an error, -1 will be returned and +.Dv errno +set to an appropriate value. +If +.Dv WNOHANG +was used, 0 can be returned indicating no error, but no processes +may have changed state either, if si_signo and/or si_pid are zero. .Sh ERRORS The .Fn wait @@ -335,6 +574,14 @@ The call was interrupted by a caught signal, or the signal did not have the .Dv SA_RESTART flag set. +.It Bq Er EINVAL +An invalid value was specified for +.Fa options , +or +.Fa idtype +and +.Fa id +do not specify a valid set of processes. .El .Sh SEE ALSO .Xr _exit 2 , @@ -344,11 +591,13 @@ flag set. .Xr siginfo 3 .Sh STANDARDS The -.Fn wait +.Fn wait , +.Fn waitpid , and -.Fn waitpid +.Fn waitid functions are defined by POSIX; -.Fn wait4 +.Fn wait6 , +.Fn wait4 , and .Fn wait3 are not specified by POSIX. -- cgit v1.1 From 0ebb6520078c6cde55a20b445228f085b52b4857 Mon Sep 17 00:00:00 2001 From: kevlo Date: Wed, 14 Nov 2012 01:45:10 +0000 Subject: Document that sendfile(2) can fail with ENOBUFS. Reviewed by: glebius --- lib/libc/sys/sendfile.2 | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/libc/sys/sendfile.2 b/lib/libc/sys/sendfile.2 index 9d1fba5..0e7dbe3 100644 --- a/lib/libc/sys/sendfile.2 +++ b/lib/libc/sys/sendfile.2 @@ -265,6 +265,8 @@ is negative. .It Bq Er EIO An error occurred while reading from .Fa fd . +.It Bq Er ENOBUFS +The system was unable to allocate an internal buffer. .It Bq Er ENOTCONN The .Fa s -- cgit v1.1 From ff16940b26eab3cfb42489dc6e1c176415175b38 Mon Sep 17 00:00:00 2001 From: grog Date: Fri, 16 Nov 2012 01:41:42 +0000 Subject: Complete man page. MFC after: 2 weeks --- lib/libc/gen/getbsize.3 | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/getbsize.3 b/lib/libc/gen/getbsize.3 index 1407051..5a759da 100644 --- a/lib/libc/gen/getbsize.3 +++ b/lib/libc/gen/getbsize.3 @@ -28,12 +28,12 @@ .\" @(#)getbsize.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd June 4, 1993 +.Dd November 16, 2012 .Dt GETBSIZE 3 .Os .Sh NAME .Nm getbsize -.Nd get user block size +.Nd get preferred block size .Sh LIBRARY .Lb libc .Sh SYNOPSIS @@ -43,11 +43,31 @@ .Sh DESCRIPTION The .Fn getbsize -function determines the user's preferred block size based on the value of the -.Dq BLOCKSIZE -environment variable; see -.Xr environ 7 -for details on its use and format. +function returns a preferred block size for reporting by system utilities +.Xr df 1 , +.Xr du 1 , +.Xr ls 1 +and +.Xr systat 1 , +based on the value of the +.En BLOCKSIZE +environment variable. +.En BLOCKSIZE +may be specified directly in bytes, or in multiples of a kilobyte by +specifying a number followed by ``K'' or ``k'', in multiples of a +megabyte by specifying a number followed by ``M'' or ``m'' or in +multiples of a gigabyte by specifying a number followed by ``G'' or +``g''. +Multiples must be integers. +.Pp +Valid values of +.En BLOCKSIZE +are 512 bytes to 1 gigabyte. +Sizes less than 512 bytes are rounded up to 512 bytes, and sizes +greater than 1 GB are rounded down to 1 GB. +In each case +.Fn getbsize +produces a warning message. .Pp The .Fn getbsize @@ -61,10 +81,6 @@ terminating null). The memory referenced by .Fa blocksizep is filled in with block size, in bytes. -.Pp -If the user's block size is unreasonable, a warning message is -written to standard error and the returned information reflects -a block size of 512 bytes. .Sh SEE ALSO .Xr df 1 , .Xr du 1 , -- cgit v1.1 From 5c97761dcf450dbea3ccd3e3706c3f843d1ac27f Mon Sep 17 00:00:00 2001 From: kevlo Date: Fri, 16 Nov 2012 09:56:25 +0000 Subject: Document that rtprio(2) and rtprio_thread(2) can fail with EFAULT due to the invoked copyout(9). Reviewed by: davidxu --- lib/libc/sys/rtprio.2 | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/rtprio.2 b/lib/libc/sys/rtprio.2 index ac78b50..f30ed22 100644 --- a/lib/libc/sys/rtprio.2 +++ b/lib/libc/sys/rtprio.2 @@ -145,13 +145,21 @@ Higher real/idle priority threads preempt lower real/idle priority threads. Threads of equal real/idle priority are run round-robin. .Sh RETURN VALUES -.Rv -std rtprio +.Rv -std rtprio rtprio_thread .Sh ERRORS The .Fn rtprio -system call +and +.Fn rtprio_thread +system calls will fail if: .Bl -tag -width Er +.It Bq Er EFAULT +The rtp pointer passed to +.Fn rtprio +or +.Fn rtprio_thread +was invalid. .It Bq Er EINVAL The specified .Fa prio -- cgit v1.1 From 3421e21beeae4f238f92e3d9e59b6e63fe8bf971 Mon Sep 17 00:00:00 2001 From: joel Date: Fri, 16 Nov 2012 12:03:50 +0000 Subject: mdoc: Use the Ev macro for environmental variables. --- lib/libc/gen/getbsize.3 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/getbsize.3 b/lib/libc/gen/getbsize.3 index 5a759da..2caf5fe 100644 --- a/lib/libc/gen/getbsize.3 +++ b/lib/libc/gen/getbsize.3 @@ -50,9 +50,9 @@ function returns a preferred block size for reporting by system utilities and .Xr systat 1 , based on the value of the -.En BLOCKSIZE +.Ev BLOCKSIZE environment variable. -.En BLOCKSIZE +.Ev BLOCKSIZE may be specified directly in bytes, or in multiples of a kilobyte by specifying a number followed by ``K'' or ``k'', in multiples of a megabyte by specifying a number followed by ``M'' or ``m'' or in @@ -61,7 +61,7 @@ multiples of a gigabyte by specifying a number followed by ``G'' or Multiples must be integers. .Pp Valid values of -.En BLOCKSIZE +.Ev BLOCKSIZE are 512 bytes to 1 gigabyte. Sizes less than 512 bytes are rounded up to 512 bytes, and sizes greater than 1 GB are rounded down to 1 GB. -- cgit v1.1 From f81cc6662509ddec4578b29f294375340c8ae404 Mon Sep 17 00:00:00 2001 From: des Date: Fri, 16 Nov 2012 12:31:43 +0000 Subject: Fix weird indentation. --- lib/libfetch/http.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libfetch/http.c b/lib/libfetch/http.c index 00dd887..abf79a3 100644 --- a/lib/libfetch/http.c +++ b/lib/libfetch/http.c @@ -1752,11 +1752,11 @@ http_request(struct url *URL, const char *op, struct url_stat *us, /* get headers. http_next_header expects one line readahead */ if (fetch_getln(conn) == -1) { - fetch_syserr(); - goto ouch; + fetch_syserr(); + goto ouch; } do { - switch ((h = http_next_header(conn, &headerbuf, &p))) { + switch ((h = http_next_header(conn, &headerbuf, &p))) { case hdr_syserror: fetch_syserr(); goto ouch; @@ -1785,7 +1785,7 @@ http_request(struct url *URL, const char *op, struct url_stat *us, conn->err != HTTP_USE_PROXY) { n = 1; break; - } + } if (new) free(new); if (verbose) -- cgit v1.1 From f2c2ff1baf52aee84f7261dd1c3b8687b02c1a66 Mon Sep 17 00:00:00 2001 From: kevlo Date: Fri, 16 Nov 2012 15:02:35 +0000 Subject: - the preferred way to write a NULL pointer constant is with NULL - whitespace nit Reviewed by: glebius --- lib/libc/net/getnetent.3 | 5 ++--- lib/libc/net/getprotoent.3 | 5 ++--- lib/libc/net/getservent.3 | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/libc/net/getnetent.3 b/lib/libc/net/getnetent.3 index 2e9cd33..ec08b6b 100644 --- a/lib/libc/net/getnetent.3 +++ b/lib/libc/net/getnetent.3 @@ -73,7 +73,7 @@ The order of the lookups is controlled by the `networks' entry in .Xr nsswitch.conf 5 . .Bd -literal -offset indent -struct netent { +struct netent { char *n_name; /* official name of net */ char **n_aliases; /* alias list */ int n_addrtype; /* net number type */ @@ -142,8 +142,7 @@ Network numbers are supplied in host order. .It Pa /etc/resolv.conf .El .Sh DIAGNOSTICS -Null pointer -(0) returned on +Null pointer returned on .Dv EOF or error. .Sh SEE ALSO diff --git a/lib/libc/net/getprotoent.3 b/lib/libc/net/getprotoent.3 index 9f9e00d..565e038 100644 --- a/lib/libc/net/getprotoent.3 +++ b/lib/libc/net/getprotoent.3 @@ -65,7 +65,7 @@ containing the broken-out fields of a line in the network protocol data base, .Pa /etc/protocols . .Bd -literal -offset indent -struct protoent { +struct protoent { char *p_name; /* official name of protocol */ char **p_aliases; /* alias list */ int p_proto; /* protocol number */ @@ -117,8 +117,7 @@ or until .Dv EOF is encountered. .Sh RETURN VALUES -Null pointer -(0) returned on +Null pointer returned on .Dv EOF or error. .Sh FILES diff --git a/lib/libc/net/getservent.3 b/lib/libc/net/getservent.3 index 65d40bb..5f5a452 100644 --- a/lib/libc/net/getservent.3 +++ b/lib/libc/net/getservent.3 @@ -65,7 +65,7 @@ containing the broken-out fields of a line in the network services data base, .Pa /etc/services . .Bd -literal -offset indent -struct servent { +struct servent { char *s_name; /* official name of service */ char **s_aliases; /* alias list */ int s_port; /* port service resides at */ @@ -130,8 +130,7 @@ searches must also match the protocol. .It Pa /etc/services .El .Sh DIAGNOSTICS -Null pointer -(0) returned on +Null pointer returned on .Dv EOF or error. .Sh SEE ALSO -- cgit v1.1 From 55b42c566aa34fe926dd40004d1a8f3ad15ed7dd Mon Sep 17 00:00:00 2001 From: tuexen Date: Mon, 19 Nov 2012 19:19:04 +0000 Subject: Fix the handling of mapped IPv6 addresses in sctp_connectx(). MFC after: 3 days --- lib/libc/net/sctp_sys_calls.c | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/net/sctp_sys_calls.c b/lib/libc/net/sctp_sys_calls.c index 1e06f44..031cfd3 100644 --- a/lib/libc/net/sctp_sys_calls.c +++ b/lib/libc/net/sctp_sys_calls.c @@ -202,7 +202,6 @@ sctp_connectx(int sd, const struct sockaddr *addrs, int addrcnt, return (-1); } if (IN6_IS_ADDR_V4MAPPED(&((struct sockaddr_in6 *)at)->sin6_addr)) { - len += sizeof(struct sockaddr_in); in6_sin6_2_sin((struct sockaddr_in *)cpto, (struct sockaddr_in6 *)at); cpto = ((caddr_t)cpto + sizeof(struct sockaddr_in)); len += sizeof(struct sockaddr_in); -- cgit v1.1 From debae4f153b89e64c1376cb9fdb14cb6fb70fefc Mon Sep 17 00:00:00 2001 From: tuexen Date: Mon, 19 Nov 2012 19:26:19 +0000 Subject: Cleanup the code a bit, which improves the portability. MFC after: 1 week --- lib/libc/net/sctp_sys_calls.c | 85 +++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/libc/net/sctp_sys_calls.c b/lib/libc/net/sctp_sys_calls.c index 031cfd3..beedf94 100644 --- a/lib/libc/net/sctp_sys_calls.c +++ b/lib/libc/net/sctp_sys_calls.c @@ -188,15 +188,18 @@ sctp_connectx(int sd, const struct sockaddr *addrs, int addrcnt, cpto = ((caddr_t)buf + sizeof(int)); /* validate all the addresses and get the size */ for (i = 0; i < addrcnt; i++) { - if (at->sa_family == AF_INET) { + switch (at->sa_family) { + case AF_INET: if (at->sa_len != sizeof(struct sockaddr_in)) { errno = EINVAL; return (-1); } - memcpy(cpto, at, at->sa_len); - cpto = ((caddr_t)cpto + at->sa_len); - len += at->sa_len; - } else if (at->sa_family == AF_INET6) { + memcpy(cpto, at, sizeof(struct sockaddr_in)); + cpto = ((caddr_t)cpto + sizeof(struct sockaddr_in)); + len += sizeof(struct sockaddr_in); + at = (struct sockaddr *)((caddr_t)at + sizeof(struct sockaddr_in)); + break; + case AF_INET6: if (at->sa_len != sizeof(struct sockaddr_in6)) { errno = EINVAL; return (-1); @@ -206,11 +209,13 @@ sctp_connectx(int sd, const struct sockaddr *addrs, int addrcnt, cpto = ((caddr_t)cpto + sizeof(struct sockaddr_in)); len += sizeof(struct sockaddr_in); } else { - memcpy(cpto, at, at->sa_len); - cpto = ((caddr_t)cpto + at->sa_len); - len += at->sa_len; + memcpy(cpto, at, sizeof(struct sockaddr_in6)); + cpto = ((caddr_t)cpto + sizeof(struct sockaddr_in6)); + len += sizeof(struct sockaddr_in6); } - } else { + at = (struct sockaddr *)((caddr_t)at + sizeof(struct sockaddr_in6)); + break; + default: errno = EINVAL; return (-1); } @@ -219,7 +224,6 @@ sctp_connectx(int sd, const struct sockaddr *addrs, int addrcnt, errno = E2BIG; return (-1); } - at = (struct sockaddr *)((caddr_t)at + at->sa_len); cnt++; } /* do we have any? */ @@ -260,56 +264,57 @@ sctp_bindx(int sd, struct sockaddr *addrs, int addrcnt, int flags) errno = EINVAL; return (-1); } - argsz = (sizeof(struct sockaddr_storage) + - sizeof(struct sctp_getaddresses)); - gaddrs = (struct sctp_getaddresses *)calloc(1, argsz); - if (gaddrs == NULL) { - errno = ENOMEM; - return (-1); - } /* First pre-screen the addresses */ sa = addrs; for (i = 0; i < addrcnt; i++) { - if (sa->sa_family == AF_INET) { - if (sa->sa_len != sizeof(struct sockaddr_in)) - goto out_error; + switch (sa->sa_family) { + case AF_INET: + if (sa->sa_len != sizeof(struct sockaddr_in)) { + errno = EINVAL; + return (-1); + } sin = (struct sockaddr_in *)sa; if (sin->sin_port) { /* non-zero port, check or save */ if (sport) { /* Check against our port */ if (sport != sin->sin_port) { - goto out_error; + errno = EINVAL; + return (-1); } } else { /* save off the port */ sport = sin->sin_port; } } - } else if (sa->sa_family == AF_INET6) { - if (sa->sa_len != sizeof(struct sockaddr_in6)) - goto out_error; + break; + case AF_INET6: + if (sa->sa_len != sizeof(struct sockaddr_in6)) { + errno = EINVAL; + return (-1); + } sin6 = (struct sockaddr_in6 *)sa; if (sin6->sin6_port) { /* non-zero port, check or save */ if (sport) { /* Check against our port */ if (sport != sin6->sin6_port) { - goto out_error; + errno = EINVAL; + return (-1); } } else { /* save off the port */ sport = sin6->sin6_port; } } - } else { - /* invalid address family specified */ - goto out_error; + break; + default: + /* Invalid address family specified. */ + errno = EINVAL; + return (-1); } - sa = (struct sockaddr *)((caddr_t)sa + sa->sa_len); } - sa = addrs; /* * Now if there was a port mentioned, assure that the first address * has that port to make sure it fails or succeeds correctly. @@ -318,20 +323,14 @@ sctp_bindx(int sd, struct sockaddr *addrs, int addrcnt, int flags) sin = (struct sockaddr_in *)sa; sin->sin_port = sport; } + argsz = sizeof(struct sctp_getaddresses) + + sizeof(struct sockaddr_storage); + if ((gaddrs = (struct sctp_getaddresses *)malloc(argsz)) == NULL) { + errno = ENOMEM; + return (-1); + } + sa = addrs; for (i = 0; i < addrcnt; i++) { - if (sa->sa_family == AF_INET) { - if (sa->sa_len != sizeof(struct sockaddr_in)) - goto out_error; - } else if (sa->sa_family == AF_INET6) { - if (sa->sa_len != sizeof(struct sockaddr_in6)) - goto out_error; - } else { - /* invalid address family specified */ - out_error: - free(gaddrs); - errno = EINVAL; - return (-1); - } memset(gaddrs, 0, argsz); gaddrs->sget_assoc_id = 0; memcpy(gaddrs->addr, sa, sa->sa_len); -- cgit v1.1 From 9acb3e40c2b5badc282bac6ad0196dbbb3f443c7 Mon Sep 17 00:00:00 2001 From: bapt Date: Tue, 20 Nov 2012 07:22:07 +0000 Subject: change mode the group file to 0644 after a successfull rename(2) --- lib/libutil/gr_util.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libutil/gr_util.c b/lib/libutil/gr_util.c index 6d96d5e..8d0490b 100644 --- a/lib/libutil/gr_util.c +++ b/lib/libutil/gr_util.c @@ -318,7 +318,14 @@ gr_copy(int ffd, int tfd, const struct group *gr, struct group *old_gr) int gr_mkdb(void) { - return (rename(tempname, group_file)); + int ret; + + ret = rename(tempname, group_file); + + if (ret == 0) + chmod(group_file, 0644); + + return (ret); } /* -- cgit v1.1 From 2ed31d29a136b086906cadaedceedd82398b9e7b Mon Sep 17 00:00:00 2001 From: bapt Date: Tue, 20 Nov 2012 14:03:09 +0000 Subject: only rename(2) after chmod(2) has succeed report error if chmod(2) fails Reported by: jh --- lib/libutil/gr_util.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/libutil/gr_util.c b/lib/libutil/gr_util.c index 8d0490b..be34395 100644 --- a/lib/libutil/gr_util.c +++ b/lib/libutil/gr_util.c @@ -318,14 +318,10 @@ gr_copy(int ffd, int tfd, const struct group *gr, struct group *old_gr) int gr_mkdb(void) { - int ret; - - ret = rename(tempname, group_file); - - if (ret == 0) - chmod(group_file, 0644); + if (chmod(tempname, 0644) != 0) + return (-1); - return (ret); + return (rename(tempname, group_file)); } /* -- cgit v1.1 From ec5aeeffd06a344918e75a6621a4dd36f5b1ccc5 Mon Sep 17 00:00:00 2001 From: kevlo Date: Fri, 23 Nov 2012 10:14:54 +0000 Subject: Document that getpeername(2) and getsockname(2) can fail with EINVAL. Reviewed by: glebius --- lib/libc/sys/getpeername.2 | 4 ++++ lib/libc/sys/getsockname.2 | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/libc/sys/getpeername.2 b/lib/libc/sys/getpeername.2 index 9037119..255c7a0 100644 --- a/lib/libc/sys/getpeername.2 +++ b/lib/libc/sys/getpeername.2 @@ -67,6 +67,10 @@ The argument is not a valid descriptor. .It Bq Er ECONNRESET The connection has been reset by the peer. +.It Bq Er EINVAL +The value of the +.Fa namelen +argument is not valid. .It Bq Er ENOTSOCK The argument .Fa s diff --git a/lib/libc/sys/getsockname.2 b/lib/libc/sys/getsockname.2 index f1537c7..1de0257 100644 --- a/lib/libc/sys/getsockname.2 +++ b/lib/libc/sys/getsockname.2 @@ -66,6 +66,10 @@ The argument is not a valid descriptor. .It Bq Er ECONNRESET The connection has been reset by the peer. +.It Bq Er EINVAL +The value of the +.Fa namelen +argument is not valid. .It Bq Er ENOTSOCK The argument .Fa s -- cgit v1.1 From 04619555a48eeacc9a13b826b57fa7f2932aae7c Mon Sep 17 00:00:00 2001 From: jilles Date: Fri, 30 Nov 2012 23:51:33 +0000 Subject: libc: Allow setting close-on-exec in fopen/freopen/fdopen. This commit adds a new mode option 'e' that must follow any 'b', '+' and/or 'x' options. C11 is clear about the 'x' needing to follow 'b' and/or '+' and that is what we implement; therefore, require a strict position for 'e' as well. For freopen() with a non-NULL path argument and fopen(), the close-on-exec flag is set iff the 'e' mode option is specified. For freopen() with a NULL path argument and fdopen(), the close-on-exec flag is turned on if the 'e' mode option is specified and remains unchanged otherwise. Although the same behaviour for fopen() can be obtained by open(O_CLOEXEC) and fdopen(), this needlessly complicates the calling code. Apart from the ordering requirement, the new option matches glibc. PR: kern/169320 --- lib/libc/stdio/fdopen.c | 6 ++++++ lib/libc/stdio/flags.c | 5 +++++ lib/libc/stdio/fopen.3 | 19 ++++++++++++++++++- lib/libc/stdio/freopen.c | 5 ++++- 4 files changed, 33 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdio/fdopen.c b/lib/libc/stdio/fdopen.c index 26e2cd7..8fc90a4 100644 --- a/lib/libc/stdio/fdopen.c +++ b/lib/libc/stdio/fdopen.c @@ -80,6 +80,12 @@ fdopen(fd, mode) if ((fp = __sfp()) == NULL) return (NULL); + + if ((oflags & O_CLOEXEC) && _fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { + fp->_flags = 0; + return (NULL); + } + fp->_flags = flags; /* * If opened for appending, but underlying descriptor does not have diff --git a/lib/libc/stdio/flags.c b/lib/libc/stdio/flags.c index 0b6b075..e445fed 100644 --- a/lib/libc/stdio/flags.c +++ b/lib/libc/stdio/flags.c @@ -97,6 +97,7 @@ __sflags(mode, optr) /* 'x' means exclusive (fail if the file exists) */ if (*mode == 'x') { + mode++; if (m == O_RDONLY) { errno = EINVAL; return (0); @@ -104,6 +105,10 @@ __sflags(mode, optr) o |= O_EXCL; } + /* set close-on-exec */ + if (*mode == 'e') + o |= O_CLOEXEC; + *optr = m | o; return (ret); } diff --git a/lib/libc/stdio/fopen.3 b/lib/libc/stdio/fopen.3 index 08438e7..da89ed4 100644 --- a/lib/libc/stdio/fopen.3 +++ b/lib/libc/stdio/fopen.3 @@ -32,7 +32,7 @@ .\" @(#)fopen.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd October 17, 2011 +.Dd November 30, 2012 .Dt FOPEN 3 .Os .Sh NAME @@ -97,6 +97,14 @@ or causes the .Fn fopen call to fail if the file already exists. +An optional +.Dq Li e +following the above +causes the +.Fn fopen +call to set the +.Dv FD_CLOEXEC +flag on the underlying file descriptor. .Pp The .Fa mode @@ -144,6 +152,11 @@ of the stream must be compatible with the mode of the file descriptor. The .Dq Li x mode option is ignored. +If the +.Dq Li e +mode option is present, the +.Dv FD_CLOEXEC +flag is set, otherwise it remains unchanged. When the stream is closed via .Xr fclose 3 , .Fa fildes @@ -277,3 +290,7 @@ The function conforms to .St -p1003.1-88 . +The +.Dq Li e +mode option does not conform to any standard +but is also supported by glibc. diff --git a/lib/libc/stdio/freopen.c b/lib/libc/stdio/freopen.c index be7bc8a..8d1ec91 100644 --- a/lib/libc/stdio/freopen.c +++ b/lib/libc/stdio/freopen.c @@ -118,6 +118,8 @@ freopen(file, mode, fp) (void) ftruncate(fp->_file, (off_t)0); if (!(oflags & O_APPEND)) (void) _sseek(fp, (fpos_t)0, SEEK_SET); + if (oflags & O_CLOEXEC) + (void) _fcntl(fp->_file, F_SETFD, FD_CLOEXEC); f = fp->_file; isopen = 0; wantfd = -1; @@ -194,7 +196,8 @@ finish: * assume stderr is always fd STDERR_FILENO, even if being freopen'd. */ if (wantfd >= 0) { - if (_dup2(f, wantfd) >= 0) { + if ((oflags & O_CLOEXEC ? _fcntl(f, F_DUP2FD_CLOEXEC, wantfd) : + _dup2(f, wantfd)) >= 0) { (void)_close(f); f = wantfd; } else -- cgit v1.1 From 242dabb510e96151dfa1c14d7e2ffd1db8fc882a Mon Sep 17 00:00:00 2001 From: eadler Date: Sat, 1 Dec 2012 15:25:41 +0000 Subject: The getline function returns the number of characters read, not written. Use clearer text for this. PR: docs/174023 Submitted by: Paul Procacci Approved by: bcr (mentor) MFC after: 1 week --- lib/libc/stdio/getline.3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdio/getline.3 b/lib/libc/stdio/getline.3 index 6061bfc..2161999 100644 --- a/lib/libc/stdio/getline.3 +++ b/lib/libc/stdio/getline.3 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 30, 2010 +.Dd November 30, 2012 .Dt GETLINE 3 .Os .Sh NAME @@ -76,7 +76,7 @@ The .Fn getdelim and .Fn getline -functions return the number of characters written, excluding the +functions return the number of characters stored in the buffer, excluding the terminating .Dv NUL character. -- cgit v1.1 From 5192abbac636f4d423299ad2010b603072124621 Mon Sep 17 00:00:00 2001 From: marcel Date: Sat, 1 Dec 2012 17:44:06 +0000 Subject: In globextend() when the pathv vector cannot be (re-)allocated, don't free and clear the gl_pathv pointer in the glob_t structure. Such breaks the invariant of the glob_t structure, as stated in the comment right in front of the globextend() function. If gl_pathv was non-NULL, then gl_pathc was > 0. Making gl_pathv a NULL pointer without also setting gl_pathc to 0 is wrong. Since we otherwise don't free the memory associated with a glob_t in error cases, it's unlikely that this change will cause a memory leak that wasn't already there to begin with. Callers of glob(3) must call globfree(3) irrespective of whether glob(3) returned an error or not. --- lib/libc/gen/glob.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/glob.c b/lib/libc/gen/glob.c index 211b535..07eeaa8 100644 --- a/lib/libc/gen/glob.c +++ b/lib/libc/gen/glob.c @@ -718,13 +718,8 @@ globextend(const Char *path, glob_t *pglob, size_t *limit) pathv = pglob->gl_pathv ? realloc((char *)pglob->gl_pathv, newsize) : malloc(newsize); - if (pathv == NULL) { - if (pglob->gl_pathv) { - free(pglob->gl_pathv); - pglob->gl_pathv = NULL; - } + if (pathv == NULL) return (GLOB_NOSPACE); - } if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { /* first time around -- clear initial gl_offs items */ -- cgit v1.1 From e0cbca43aafc47c5c47db73820ede29110da0d0d Mon Sep 17 00:00:00 2001 From: marcel Date: Sat, 1 Dec 2012 17:50:39 +0000 Subject: In globextend(), take advantage of the fact that realloc(NULL, size) is equivalent to malloc(size). This eliminates the conditional expression used for calling either realloc() or malloc() when realloc() will do all the time. --- lib/libc/gen/glob.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/glob.c b/lib/libc/gen/glob.c index 07eeaa8..3c5020c 100644 --- a/lib/libc/gen/glob.c +++ b/lib/libc/gen/glob.c @@ -715,9 +715,8 @@ globextend(const Char *path, glob_t *pglob, size_t *limit) } newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); - pathv = pglob->gl_pathv ? - realloc((char *)pglob->gl_pathv, newsize) : - malloc(newsize); + /* realloc(NULL, newsize) is equivalent to malloc(newsize). */ + pathv = realloc((void *)pglob->gl_pathv, newsize); if (pathv == NULL) return (GLOB_NOSPACE); -- cgit v1.1 From aee35ecd72729f8d859eba45444833f5bb23d581 Mon Sep 17 00:00:00 2001 From: marcel Date: Sat, 1 Dec 2012 21:26:46 +0000 Subject: Protect against DoS attacks, such as being described in CVE-2010-2632. The changes were derived from what has been committed to NetBSD, with modifications. These are: 1. Preserve the existsing GLOB_LIMIT behaviour by including the number of matches to the set of parameters to limit. 2. Change some of the limits to avoid impacting normal use cases: GLOB_LIMIT_STRING - change from 65536 to ARG_MAX so that glob(3) can still provide a full command line of expanded names. GLOB_LIMIT_STAT - change from 128 to 1024 for no other reason than that 128 feels too low (it's not a limit that impacts the behaviour of the test program listed in CVE-2010-2632). GLOB_LIMIT_PATH - change from 1024 to 65536 so that glob(3) can still provide a fill command line of expanded names. 3. Protect against buffer overruns when we hit the GLOB_LIMIT_STAT or GLOB_LIMIT_READDIR limits. We append SEP and EOS to pathend in those cases. Return GLOB_ABORTED instead of GLOB_NOSPACE when we would otherwise overrun the buffer. This change also modifies the existing behaviour of glob(3) in case GLOB_LIMIT is specifies by limiting the *new* matches and not all matches. This is an important distinction when GLOB_APPEND is set or when the caller uses a non-zero gl_offs. Previously pre-existing matches or the value of gl_offs would be counted in the number of matches even though the man page states that glob(3) would return GLOB_NOSPACE when gl_matchc or more matches were found. The limits that cannot be circumvented are GLOB_LIMIT_STRING and GLOB_LIMIT_PATH all others can be crossed by simply calling glob(3) again and with GLOB_APPEND set. The entire description above applies only when GLOB_LIMIT has been specified of course. No limits apply when this flag isn't set! Obtained from: Juniper Networks, Inc --- lib/libc/gen/glob.c | 101 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/glob.c b/lib/libc/gen/glob.c index 3c5020c..832dc8d 100644 --- a/lib/libc/gen/glob.c +++ b/lib/libc/gen/glob.c @@ -94,6 +94,25 @@ __FBSDID("$FreeBSD$"); #include "collate.h" +/* + * glob(3) expansion limits. Stop the expansion if any of these limits + * is reached. This caps the runtime in the face of DoS attacks. See + * also CVE-2010-2632 + */ +#define GLOB_LIMIT_BRACE 128 /* number of brace calls */ +#define GLOB_LIMIT_PATH 65536 /* number of path elements */ +#define GLOB_LIMIT_READDIR 16384 /* number of readdirs */ +#define GLOB_LIMIT_STAT 1024 /* number of stat system calls */ +#define GLOB_LIMIT_STRING ARG_MAX /* maximum total size for paths */ + +struct glob_limit { + size_t l_brace_cnt; + size_t l_path_lim; + size_t l_readdir_cnt; + size_t l_stat_cnt; + size_t l_string_cnt; +}; + #define DOLLAR '$' #define DOT '.' #define EOS '\0' @@ -153,15 +172,18 @@ static const Char *g_strchr(const Char *, wchar_t); static Char *g_strcat(Char *, const Char *); #endif static int g_stat(Char *, struct stat *, glob_t *); -static int glob0(const Char *, glob_t *, size_t *); -static int glob1(Char *, glob_t *, size_t *); -static int glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *); -static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *); -static int globextend(const Char *, glob_t *, size_t *); -static const Char * +static int glob0(const Char *, glob_t *, struct glob_limit *); +static int glob1(Char *, glob_t *, struct glob_limit *); +static int glob2(Char *, Char *, Char *, Char *, glob_t *, + struct glob_limit *); +static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, + struct glob_limit *); +static int globextend(const Char *, glob_t *, struct glob_limit *); +static const Char * globtilde(const Char *, Char *, size_t, glob_t *); -static int globexp1(const Char *, glob_t *, size_t *); -static int globexp2(const Char *, const Char *, glob_t *, int *, size_t *); +static int globexp1(const Char *, glob_t *, struct glob_limit *); +static int globexp2(const Char *, const Char *, glob_t *, int *, + struct glob_limit *); static int match(Char *, Char *, Char *); #ifdef DEBUG static void qprintf(const char *, Char *); @@ -171,8 +193,8 @@ int glob(const char * __restrict pattern, int flags, int (*errfunc)(const char *, int), glob_t * __restrict pglob) { + struct glob_limit limit = { 0, 0, 0, 0, 0 }; const char *patnext; - size_t limit; Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot; mbstate_t mbs; wchar_t wc; @@ -186,11 +208,10 @@ glob(const char * __restrict pattern, int flags, pglob->gl_offs = 0; } if (flags & GLOB_LIMIT) { - limit = pglob->gl_matchc; - if (limit == 0) - limit = ARG_MAX; - } else - limit = 0; + limit.l_path_lim = pglob->gl_matchc; + if (limit.l_path_lim == 0) + limit.l_path_lim = GLOB_LIMIT_PATH; + } pglob->gl_flags = flags & ~GLOB_MAGCHAR; pglob->gl_errfunc = errfunc; pglob->gl_matchc = 0; @@ -243,11 +264,17 @@ glob(const char * __restrict pattern, int flags, * characters */ static int -globexp1(const Char *pattern, glob_t *pglob, size_t *limit) +globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit) { const Char* ptr = pattern; int rv; + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) { + errno = 0; + return (GLOB_NOSPACE); + } + /* Protect a single {}, for find(1), like csh */ if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) return glob0(pattern, pglob, limit); @@ -266,7 +293,8 @@ globexp1(const Char *pattern, glob_t *pglob, size_t *limit) * If it fails then it tries to glob the rest of the pattern and returns. */ static int -globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, size_t *limit) +globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, + struct glob_limit *limit) { int i; Char *lm, *ls; @@ -436,7 +464,7 @@ globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob) * if things went well, nonzero if errors occurred. */ static int -glob0(const Char *pattern, glob_t *pglob, size_t *limit) +glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit) { const Char *qpatnext; int err; @@ -529,7 +557,7 @@ compare(const void *p, const void *q) } static int -glob1(Char *pattern, glob_t *pglob, size_t *limit) +glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit) { Char pathbuf[MAXPATHLEN]; @@ -547,7 +575,7 @@ glob1(Char *pattern, glob_t *pglob, size_t *limit) */ static int glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, - glob_t *pglob, size_t *limit) + glob_t *pglob, struct glob_limit *limit) { struct stat sb; Char *p, *q; @@ -563,6 +591,15 @@ glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, if (g_lstat(pathbuf, &sb, pglob)) return (0); + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) { + errno = 0; + if (pathend + 1 > pathend_last) + return (GLOB_ABORTED); + *pathend++ = SEP; + *pathend = EOS; + return (GLOB_NOSPACE); + } if (((pglob->gl_flags & GLOB_MARK) && pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) || (S_ISLNK(sb.st_mode) && @@ -606,7 +643,7 @@ glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, static int glob3(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, Char *restpattern, - glob_t *pglob, size_t *limit) + glob_t *pglob, struct glob_limit *limit) { struct dirent *dp; DIR *dirp; @@ -652,6 +689,19 @@ glob3(Char *pathbuf, Char *pathend, Char *pathend_last, size_t clen; mbstate_t mbs; + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) { + errno = 0; + if (pathend + 1 > pathend_last) + err = GLOB_ABORTED; + else { + *pathend++ = SEP; + *pathend = EOS; + err = GLOB_NOSPACE; + } + break; + } + /* Initial DOT must be matched literally. */ if (dp->d_name[0] == DOT && *pattern != DOT) continue; @@ -702,14 +752,15 @@ glob3(Char *pathbuf, Char *pathend, Char *pathend_last, * gl_pathv points to (gl_offs + gl_pathc + 1) items. */ static int -globextend(const Char *path, glob_t *pglob, size_t *limit) +globextend(const Char *path, glob_t *pglob, struct glob_limit *limit) { char **pathv; size_t i, newsize, len; char *copy; const Char *p; - if (*limit && pglob->gl_pathc > *limit) { + if ((pglob->gl_flags & GLOB_LIMIT) && + pglob->gl_matchc > limit->l_path_lim) { errno = 0; return (GLOB_NOSPACE); } @@ -731,6 +782,12 @@ globextend(const Char *path, glob_t *pglob, size_t *limit) for (p = path; *p++;) continue; len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */ + limit->l_string_cnt += len; + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_string_cnt >= GLOB_LIMIT_STRING) { + errno = 0; + return (GLOB_NOSPACE); + } if ((copy = malloc(len)) != NULL) { if (g_Ctoc(path, copy, len)) { free(copy); -- cgit v1.1 From 6de2c08bc400b4aca9fb46684e8bdb56eed9b09f Mon Sep 17 00:00:00 2001 From: dim Date: Sun, 2 Dec 2012 13:10:19 +0000 Subject: Vendor import of llvm release_32 branch r168974 (effectively, 3.2 RC2): http://llvm.org/svn/llvm-project/llvm/branches/release_32@168974 --- lib/Analysis/AliasAnalysis.cpp | 10 +- lib/Analysis/AliasSetTracker.cpp | 6 +- lib/Analysis/Analysis.cpp | 4 +- lib/Analysis/BasicAliasAnalysis.cpp | 170 +- lib/Analysis/BranchProbabilityInfo.cpp | 134 +- lib/Analysis/CMakeLists.txt | 5 +- lib/Analysis/CaptureTracking.cpp | 4 +- lib/Analysis/CodeMetrics.cpp | 10 +- lib/Analysis/ConstantFolding.cpp | 265 +- lib/Analysis/CostModel.cpp | 193 + lib/Analysis/DependenceAnalysis.cpp | 3786 ++++++++++++++++++++ lib/Analysis/DominanceFrontier.cpp | 2 + lib/Analysis/IPA/CallGraph.cpp | 13 +- lib/Analysis/IPA/GlobalsModRef.cpp | 6 +- lib/Analysis/IVUsers.cpp | 6 +- lib/Analysis/InlineCost.cpp | 95 +- lib/Analysis/InstructionSimplify.cpp | 89 +- lib/Analysis/LazyValueInfo.cpp | 52 +- lib/Analysis/Lint.cpp | 56 +- lib/Analysis/Loads.cpp | 8 +- lib/Analysis/LoopDependenceAnalysis.cpp | 362 -- lib/Analysis/LoopInfo.cpp | 11 +- lib/Analysis/MemoryBuiltins.cpp | 184 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 30 +- lib/Analysis/NoAliasAnalysis.cpp | 4 +- lib/Analysis/PHITransAddr.cpp | 2 + lib/Analysis/ProfileDataLoader.cpp | 155 + lib/Analysis/ProfileDataLoaderPass.cpp | 188 + lib/Analysis/ProfileEstimatorPass.cpp | 2 +- lib/Analysis/ProfileInfo.cpp | 26 - lib/Analysis/RegionInfo.cpp | 26 +- lib/Analysis/RegionPass.cpp | 5 +- lib/Analysis/ScalarEvolution.cpp | 116 +- lib/Analysis/ScalarEvolutionExpander.cpp | 23 +- lib/Analysis/Trace.cpp | 2 + lib/Analysis/ValueTracking.cpp | 61 +- lib/Archive/ArchiveInternals.h | 2 +- lib/Archive/ArchiveReader.cpp | 4 +- lib/AsmParser/LLLexer.cpp | 9 +- lib/AsmParser/LLParser.cpp | 257 +- lib/AsmParser/LLParser.h | 2 +- lib/AsmParser/LLToken.h | 11 +- lib/Bitcode/Reader/BitcodeReader.cpp | 159 +- lib/Bitcode/Reader/BitcodeReader.h | 67 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 130 +- lib/Bitcode/Writer/ValueEnumerator.h | 6 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 4 +- lib/CodeGen/AllocationOrder.cpp | 5 +- lib/CodeGen/Analysis.cpp | 18 +- lib/CodeGen/AsmPrinter/ARMException.cpp | 2 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 171 +- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 4 +- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 234 +- lib/CodeGen/AsmPrinter/DIE.cpp | 18 +- lib/CodeGen/AsmPrinter/DIE.h | 8 - lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 4 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 4 +- lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 67 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 7 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 203 +- lib/CodeGen/AsmPrinter/DwarfDebug.h | 27 +- lib/CodeGen/AsmPrinter/DwarfException.cpp | 4 +- lib/CodeGen/AsmPrinter/DwarfException.h | 40 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 4 +- lib/CodeGen/AsmPrinter/Win64Exception.cpp | 2 +- lib/CodeGen/BranchFolding.cpp | 25 +- lib/CodeGen/CMakeLists.txt | 3 + lib/CodeGen/CalcSpillWeights.cpp | 6 +- lib/CodeGen/CallingConvLower.cpp | 4 +- lib/CodeGen/CodeGen.cpp | 2 + lib/CodeGen/CodePlacementOpt.cpp | 2 +- lib/CodeGen/CriticalAntiDepBreaker.cpp | 2 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 8 +- lib/CodeGen/EarlyIfConversion.cpp | 10 +- lib/CodeGen/ExecutionDepsFix.cpp | 11 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 4 +- lib/CodeGen/GCStrategy.cpp | 14 +- lib/CodeGen/IfConversion.cpp | 6 +- lib/CodeGen/InlineSpiller.cpp | 6 +- lib/CodeGen/IntrinsicLowering.cpp | 8 +- lib/CodeGen/LLVMTargetMachine.cpp | 6 +- lib/CodeGen/LiveDebugVariables.cpp | 3 +- lib/CodeGen/LiveInterval.cpp | 97 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 831 ++--- lib/CodeGen/LiveIntervalUnion.h | 4 +- lib/CodeGen/LiveRangeCalc.cpp | 6 +- lib/CodeGen/LiveRangeEdit.cpp | 11 +- lib/CodeGen/LiveRegMatrix.cpp | 4 +- lib/CodeGen/LiveRegMatrix.h | 2 +- lib/CodeGen/LiveStackAnalysis.cpp | 5 +- lib/CodeGen/LiveVariables.cpp | 42 +- lib/CodeGen/MachineBasicBlock.cpp | 90 +- lib/CodeGen/MachineBlockPlacement.cpp | 6 +- lib/CodeGen/MachineBranchProbabilityInfo.cpp | 20 +- lib/CodeGen/MachineCSE.cpp | 70 +- lib/CodeGen/MachineCopyPropagation.cpp | 13 +- lib/CodeGen/MachineFunction.cpp | 49 +- lib/CodeGen/MachineFunctionPrinterPass.cpp | 2 +- lib/CodeGen/MachineInstr.cpp | 337 +- lib/CodeGen/MachineInstrBundle.cpp | 62 +- lib/CodeGen/MachineLICM.cpp | 2 +- lib/CodeGen/MachineLoopInfo.cpp | 2 + lib/CodeGen/MachineModuleInfo.cpp | 2 +- lib/CodeGen/MachineModuleInfoImpls.cpp | 4 +- lib/CodeGen/MachinePostDominators.cpp | 55 + lib/CodeGen/MachineRegisterInfo.cpp | 18 +- lib/CodeGen/MachineScheduler.cpp | 1458 +++++--- lib/CodeGen/MachineSink.cpp | 2 - lib/CodeGen/MachineTraceMetrics.cpp | 74 +- lib/CodeGen/MachineTraceMetrics.h | 13 +- lib/CodeGen/MachineVerifier.cpp | 157 +- lib/CodeGen/Passes.cpp | 17 +- lib/CodeGen/PeepholeOptimizer.cpp | 5 + lib/CodeGen/PostRASchedulerList.cpp | 9 +- lib/CodeGen/ProcessImplicitDefs.cpp | 3 +- lib/CodeGen/PrologEpilogInserter.cpp | 8 +- lib/CodeGen/RegAllocBasic.cpp | 3 +- lib/CodeGen/RegAllocFast.cpp | 73 +- lib/CodeGen/RegAllocGreedy.cpp | 8 +- lib/CodeGen/RegAllocPBQP.cpp | 17 +- lib/CodeGen/RegisterClassInfo.cpp | 10 +- lib/CodeGen/RegisterCoalescer.cpp | 1077 ++++-- lib/CodeGen/RegisterCoalescer.h | 7 + lib/CodeGen/RegisterPressure.cpp | 35 +- lib/CodeGen/RegisterScavenging.cpp | 7 +- lib/CodeGen/ScheduleDAG.cpp | 2 + lib/CodeGen/ScheduleDAGInstrs.cpp | 346 +- lib/CodeGen/ScheduleDAGPrinter.cpp | 3 +- lib/CodeGen/ScoreboardHazardRecognizer.cpp | 2 + lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1190 +++++- lib/CodeGen/SelectionDAG/FastISel.cpp | 4 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 8 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 55 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 147 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 50 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 43 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 3 +- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 48 +- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 73 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 27 +- lib/CodeGen/SelectionDAG/SDNodeOrdering.h | 4 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 182 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 44 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 18 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 9 +- lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 2 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 284 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 372 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 19 +- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 11 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 31 +- lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 7 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 43 +- .../SelectionDAG/TargetSelectionDAGInfo.cpp | 2 +- lib/CodeGen/ShrinkWrapping.cpp | 22 +- lib/CodeGen/SjLjEHPrepare.cpp | 86 +- lib/CodeGen/SlotIndexes.cpp | 4 + lib/CodeGen/SplitKit.cpp | 2 + lib/CodeGen/StackColoring.cpp | 783 ++++ lib/CodeGen/StackProtector.cpp | 69 +- lib/CodeGen/StackSlotColoring.cpp | 6 +- lib/CodeGen/StrongPHIElimination.cpp | 11 +- lib/CodeGen/TailDuplication.cpp | 3 +- lib/CodeGen/TargetInstrInfoImpl.cpp | 120 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 12 +- lib/CodeGen/TargetSchedule.cpp | 306 ++ lib/CodeGen/TwoAddressInstructionPass.cpp | 499 +-- lib/CodeGen/VirtRegMap.cpp | 16 +- lib/CodeGen/VirtRegMap.h | 4 +- lib/DebugInfo/CMakeLists.txt | 1 + lib/DebugInfo/DIContext.cpp | 7 +- lib/DebugInfo/DWARFCompileUnit.cpp | 48 +- lib/DebugInfo/DWARFCompileUnit.h | 16 +- lib/DebugInfo/DWARFContext.cpp | 220 +- lib/DebugInfo/DWARFContext.h | 40 +- lib/DebugInfo/DWARFDebugAranges.cpp | 1 - lib/DebugInfo/DWARFDebugInfoEntry.cpp | 184 +- lib/DebugInfo/DWARFDebugInfoEntry.h | 54 +- lib/DebugInfo/DWARFDebugLine.cpp | 27 + lib/DebugInfo/DWARFDebugLine.h | 8 + lib/DebugInfo/DWARFDebugRangeList.cpp | 67 + lib/DebugInfo/DWARFDebugRangeList.h | 78 + lib/DebugInfo/DWARFFormValue.cpp | 76 +- lib/DebugInfo/DWARFFormValue.h | 2 +- lib/ExecutionEngine/ExecutionEngine.cpp | 43 +- lib/ExecutionEngine/ExecutionEngineBindings.cpp | 2 +- lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt | 9 +- .../IntelJITEvents/IntelJITEventListener.cpp | 32 +- .../IntelJITEvents/IntelJITEventsWrapper.h | 102 + lib/ExecutionEngine/IntelJITEvents/Makefile | 5 +- .../IntelJITEvents/ittnotify_config.h | 454 +++ .../IntelJITEvents/ittnotify_types.h | 70 + lib/ExecutionEngine/IntelJITEvents/jitprofiling.c | 481 +++ lib/ExecutionEngine/IntelJITEvents/jitprofiling.h | 259 ++ .../Interpreter/ExternalFunctions.cpp | 8 +- lib/ExecutionEngine/Interpreter/Interpreter.cpp | 2 +- lib/ExecutionEngine/Interpreter/Interpreter.h | 4 +- lib/ExecutionEngine/JIT/JIT.cpp | 14 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp | 4 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.h | 4 +- lib/ExecutionEngine/JIT/JITEmitter.cpp | 36 +- lib/ExecutionEngine/MCJIT/CMakeLists.txt | 1 - lib/ExecutionEngine/MCJIT/MCJIT.cpp | 113 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 26 +- lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp | 14 - lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h | 50 - lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp | 8 +- lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h | 6 +- lib/ExecutionEngine/RuntimeDyld/ObjectImage.h | 59 - .../RuntimeDyld/ObjectImageCommon.h | 76 + lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 145 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 555 ++- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h | 56 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h | 96 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 28 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h | 6 +- lib/ExecutionEngine/TargetSelect.cpp | 9 +- lib/MC/ELFObjectWriter.cpp | 28 +- lib/MC/MCAsmBackend.cpp | 7 +- lib/MC/MCAsmInfo.cpp | 2 +- lib/MC/MCAsmInfoCOFF.cpp | 4 +- lib/MC/MCAsmInfoDarwin.cpp | 1 + lib/MC/MCAsmStreamer.cpp | 27 +- lib/MC/MCAssembler.cpp | 15 +- lib/MC/MCContext.cpp | 6 + lib/MC/MCDisassembler/Disassembler.cpp | 14 + lib/MC/MCDisassembler/EDDisassembler.cpp | 3 +- lib/MC/MCDwarf.cpp | 2 + lib/MC/MCELFObjectTargetWriter.cpp | 8 + lib/MC/MCELFStreamer.cpp | 52 +- lib/MC/MCExpr.cpp | 11 +- lib/MC/MCInst.cpp | 4 + lib/MC/MCInstPrinter.cpp | 14 + lib/MC/MCLabel.cpp | 2 + lib/MC/MCMachOStreamer.cpp | 59 +- lib/MC/MCObjectFileInfo.cpp | 33 +- lib/MC/MCObjectStreamer.cpp | 45 +- lib/MC/MCParser/AsmLexer.cpp | 13 +- lib/MC/MCParser/AsmParser.cpp | 578 ++- lib/MC/MCParser/ELFAsmParser.cpp | 2 +- lib/MC/MCParser/MCAsmLexer.cpp | 3 +- lib/MC/MCParser/MCAsmParser.cpp | 2 + lib/MC/MCParser/MCTargetAsmParser.cpp | 2 +- lib/MC/MCRegisterInfo.cpp | 3 + lib/MC/MCStreamer.cpp | 4 + lib/MC/MCSubtargetInfo.cpp | 57 +- lib/MC/MCSymbol.cpp | 4 +- lib/MC/MCValue.cpp | 2 + lib/MC/MachObjectWriter.cpp | 53 +- lib/MC/SubtargetFeature.cpp | 35 +- lib/MC/WinCOFFStreamer.cpp | 42 - lib/Object/COFFObjectFile.cpp | 14 +- lib/Object/MachOObjectFile.cpp | 21 +- lib/Support/APFloat.cpp | 230 +- lib/Support/Atomic.cpp | 14 +- lib/Support/CMakeLists.txt | 6 - lib/Support/CommandLine.cpp | 26 +- lib/Support/DAGDeltaAlgorithm.cpp | 10 +- lib/Support/DataExtractor.cpp | 6 +- lib/Support/DataStream.cpp | 2 +- lib/Support/DynamicLibrary.cpp | 2 +- lib/Support/Errno.cpp | 15 +- lib/Support/FoldingSet.cpp | 18 + lib/Support/Host.cpp | 4 + lib/Support/LockFileManager.cpp | 2 +- lib/Support/Makefile | 3 - lib/Support/Memory.cpp | 56 - lib/Support/MemoryBuffer.cpp | 58 +- lib/Support/SmallVector.cpp | 6 +- lib/Support/StreamableMemoryObject.cpp | 24 +- lib/Support/StringMap.cpp | 9 +- lib/Support/StringRef.cpp | 4 +- lib/Support/Triple.cpp | 59 +- lib/Support/Unix/Memory.inc | 206 +- lib/Support/Unix/Path.inc | 17 +- lib/Support/Unix/Signals.inc | 38 +- lib/Support/Windows/Memory.inc | 165 +- lib/Support/Windows/PathV2.inc | 2 +- lib/Support/YAMLParser.cpp | 7 + lib/Support/raw_ostream.cpp | 12 +- lib/Support/regexec.c | 2 +- lib/Support/system_error.cpp | 10 +- lib/TableGen/CMakeLists.txt | 5 - lib/TableGen/Error.cpp | 35 +- lib/TableGen/Main.cpp | 138 +- lib/TableGen/Makefile | 4 - lib/TableGen/Record.cpp | 615 ++-- lib/TableGen/TGParser.cpp | 94 +- lib/TableGen/TGParser.h | 23 +- lib/TableGen/TableGenAction.cpp | 15 - lib/Target/ARM/ARM.h | 1 + lib/Target/ARM/ARM.td | 31 +- lib/Target/ARM/ARMAsmPrinter.cpp | 98 +- lib/Target/ARM/ARMAsmPrinter.h | 40 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 943 ++++- lib/Target/ARM/ARMBaseInstrInfo.h | 16 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 173 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 13 +- lib/Target/ARM/ARMCallingConv.td | 2 + lib/Target/ARM/ARMCodeEmitter.cpp | 8 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 9 +- lib/Target/ARM/ARMConstantPoolValue.h | 5 - lib/Target/ARM/ARMELFWriterInfo.cpp | 78 - lib/Target/ARM/ARMELFWriterInfo.h | 59 - lib/Target/ARM/ARMExpandPseudoInsts.cpp | 16 +- lib/Target/ARM/ARMFastISel.cpp | 233 +- lib/Target/ARM/ARMFrameLowering.cpp | 41 +- lib/Target/ARM/ARMHazardRecognizer.cpp | 2 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 174 +- lib/Target/ARM/ARMISelLowering.cpp | 679 +++- lib/Target/ARM/ARMISelLowering.h | 19 +- lib/Target/ARM/ARMInstrFormats.td | 21 + lib/Target/ARM/ARMInstrInfo.cpp | 62 + lib/Target/ARM/ARMInstrInfo.td | 212 +- lib/Target/ARM/ARMInstrNEON.td | 125 +- lib/Target/ARM/ARMInstrThumb.td | 6 +- lib/Target/ARM/ARMInstrThumb2.td | 138 +- lib/Target/ARM/ARMInstrVFP.td | 12 +- lib/Target/ARM/ARMJITInfo.cpp | 2 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 6 +- lib/Target/ARM/ARMMachineFunctionInfo.h | 12 +- lib/Target/ARM/ARMRegisterInfo.td | 23 +- lib/Target/ARM/ARMSchedule.td | 2 + lib/Target/ARM/ARMScheduleA9.td | 597 ++- lib/Target/ARM/ARMScheduleSwift.td | 1085 ++++++ lib/Target/ARM/ARMSelectionDAGInfo.cpp | 2 +- lib/Target/ARM/ARMSubtarget.cpp | 9 +- lib/Target/ARM/ARMSubtarget.h | 14 +- lib/Target/ARM/ARMTargetMachine.cpp | 19 +- lib/Target/ARM/ARMTargetMachine.h | 35 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 280 +- lib/Target/ARM/CMakeLists.txt | 1 - lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 92 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 584 +-- lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 3 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 15 +- lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 7 + lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 1 - lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 10 +- lib/Target/ARM/MCTargetDesc/ARMMCExpr.h | 5 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 8 + lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 2 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 51 +- lib/Target/ARM/MLxExpansionPass.cpp | 74 +- lib/Target/CMakeLists.txt | 3 +- lib/Target/CellSPU/SPUAsmPrinter.cpp | 6 +- lib/Target/CellSPU/SPUFrameLowering.cpp | 2 +- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 43 +- lib/Target/CellSPU/SPUSubtarget.h | 4 +- lib/Target/CellSPU/SPUTargetMachine.cpp | 5 +- lib/Target/CellSPU/SPUTargetMachine.h | 17 +- lib/Target/CppBackend/CPPBackend.cpp | 35 +- lib/Target/CppBackend/CPPTargetMachine.h | 4 +- lib/Target/Hexagon/CMakeLists.txt | 1 + lib/Target/Hexagon/HexagonAsmPrinter.cpp | 2 +- lib/Target/Hexagon/HexagonCallingConvLower.cpp | 2 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 2 + lib/Target/Hexagon/HexagonInstrFormats.td | 10 + lib/Target/Hexagon/HexagonInstrInfo.cpp | 28 +- lib/Target/Hexagon/HexagonInstrInfo.td | 312 +- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 681 ++++ lib/Target/Hexagon/HexagonMachineScheduler.h | 244 ++ lib/Target/Hexagon/HexagonNewValueJump.cpp | 2 +- lib/Target/Hexagon/HexagonPeephole.cpp | 35 + lib/Target/Hexagon/HexagonRegisterInfo.cpp | 52 + lib/Target/Hexagon/HexagonRegisterInfo.h | 5 + lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp | 2 +- lib/Target/Hexagon/HexagonSchedule.td | 1 + lib/Target/Hexagon/HexagonScheduleV4.td | 1 + lib/Target/Hexagon/HexagonSubtarget.cpp | 33 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 28 +- lib/Target/Hexagon/HexagonTargetMachine.h | 17 +- lib/Target/Hexagon/HexagonTargetObjectFile.cpp | 4 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 4 +- .../Hexagon/HexagonVarargsCallingConvention.h | 8 +- .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp | 2 +- lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp | 29 +- lib/Target/MBlaze/CMakeLists.txt | 1 - lib/Target/MBlaze/MBlazeAsmPrinter.cpp | 2 +- lib/Target/MBlaze/MBlazeELFWriterInfo.cpp | 107 - lib/Target/MBlaze/MBlazeELFWriterInfo.h | 59 - lib/Target/MBlaze/MBlazeFrameLowering.cpp | 2 +- lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp | 5 +- lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 2 +- lib/Target/MBlaze/MBlazeTargetMachine.cpp | 7 +- lib/Target/MBlaze/MBlazeTargetMachine.h | 20 +- lib/Target/MBlaze/MBlazeTargetObjectFile.cpp | 4 +- .../MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp | 3 +- .../MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp | 4 +- .../MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h | 3 +- lib/Target/MSP430/MSP430FrameLowering.cpp | 16 +- lib/Target/MSP430/MSP430FrameLowering.h | 1 + lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 4 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 4 +- lib/Target/MSP430/MSP430ISelLowering.h | 2 +- lib/Target/MSP430/MSP430RegisterInfo.cpp | 14 - lib/Target/MSP430/MSP430RegisterInfo.h | 2 - lib/Target/MSP430/MSP430TargetMachine.cpp | 6 +- lib/Target/MSP430/MSP430TargetMachine.h | 16 +- lib/Target/Mangler.cpp | 9 +- lib/Target/Mips/AsmParser/CMakeLists.txt | 1 + lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 1289 ++++++- lib/Target/Mips/CMakeLists.txt | 2 + lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 29 + lib/Target/Mips/MCTargetDesc/CMakeLists.txt | 1 + lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 16 +- lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h | 6 +- .../Mips/MCTargetDesc/MipsDirectObjLower.cpp | 81 + lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h | 28 + .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 10 +- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 60 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h | 12 +- lib/Target/Mips/Makefile | 4 +- lib/Target/Mips/Mips.td | 19 +- lib/Target/Mips/Mips16FrameLowering.cpp | 56 +- lib/Target/Mips/Mips16FrameLowering.h | 5 + lib/Target/Mips/Mips16InstrInfo.cpp | 93 +- lib/Target/Mips/Mips16InstrInfo.h | 4 + lib/Target/Mips/Mips16InstrInfo.td | 1225 ++++++- lib/Target/Mips/Mips16RegisterInfo.cpp | 122 +- lib/Target/Mips/Mips16RegisterInfo.h | 5 +- lib/Target/Mips/Mips64InstrInfo.td | 95 +- lib/Target/Mips/MipsAnalyzeImmediate.cpp | 2 +- lib/Target/Mips/MipsAsmPrinter.cpp | 42 +- lib/Target/Mips/MipsAsmPrinter.h | 8 + lib/Target/Mips/MipsCallingConv.td | 12 - lib/Target/Mips/MipsCodeEmitter.cpp | 46 +- lib/Target/Mips/MipsDSPInstrFormats.td | 309 ++ lib/Target/Mips/MipsDSPInstrInfo.td | 1319 +++++++ lib/Target/Mips/MipsDelaySlotFiller.cpp | 11 +- lib/Target/Mips/MipsFrameLowering.cpp | 36 +- lib/Target/Mips/MipsFrameLowering.h | 3 + lib/Target/Mips/MipsISelDAGToDAG.cpp | 210 +- lib/Target/Mips/MipsISelLowering.cpp | 1204 ++++--- lib/Target/Mips/MipsISelLowering.h | 164 + lib/Target/Mips/MipsInstrFPU.td | 41 +- lib/Target/Mips/MipsInstrFormats.td | 29 + lib/Target/Mips/MipsInstrInfo.cpp | 55 +- lib/Target/Mips/MipsInstrInfo.h | 12 - lib/Target/Mips/MipsInstrInfo.td | 208 +- lib/Target/Mips/MipsLongBranch.cpp | 232 +- lib/Target/Mips/MipsMCInstLower.cpp | 29 - lib/Target/Mips/MipsMCInstLower.h | 3 +- lib/Target/Mips/MipsMachineFunction.cpp | 13 + lib/Target/Mips/MipsMachineFunction.h | 50 +- lib/Target/Mips/MipsRegisterInfo.cpp | 50 +- lib/Target/Mips/MipsRegisterInfo.h | 4 +- lib/Target/Mips/MipsRegisterInfo.td | 27 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 16 +- lib/Target/Mips/MipsSEInstrInfo.cpp | 51 +- lib/Target/Mips/MipsSEInstrInfo.h | 8 +- lib/Target/Mips/MipsSERegisterInfo.cpp | 45 +- lib/Target/Mips/MipsSERegisterInfo.h | 9 +- lib/Target/Mips/MipsSubtarget.cpp | 9 +- lib/Target/Mips/MipsSubtarget.h | 11 +- lib/Target/Mips/MipsTargetMachine.cpp | 7 +- lib/Target/Mips/MipsTargetMachine.h | 18 +- lib/Target/Mips/MipsTargetObjectFile.cpp | 10 +- lib/Target/NVPTX/NVPTX.td | 34 +- lib/Target/NVPTX/NVPTXAllocaHoisting.h | 4 +- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 105 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 68 +- lib/Target/NVPTX/NVPTXISelLowering.h | 3 + lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 4 +- lib/Target/NVPTX/NVPTXLowerAggrCopies.h | 4 +- lib/Target/NVPTX/NVPTXSubtarget.cpp | 20 +- lib/Target/NVPTX/NVPTXSubtarget.h | 12 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 7 +- lib/Target/NVPTX/NVPTXTargetMachine.h | 16 +- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 8 +- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 15 +- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 92 +- lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h | 10 + lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 4 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 56 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 2 +- lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 2 +- lib/Target/PowerPC/PPC.td | 10 + lib/Target/PowerPC/PPCAsmPrinter.cpp | 101 +- lib/Target/PowerPC/PPCCallingConv.td | 7 + lib/Target/PowerPC/PPCFrameLowering.cpp | 266 +- lib/Target/PowerPC/PPCFrameLowering.h | 71 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 156 +- lib/Target/PowerPC/PPCISelLowering.cpp | 1217 ++++++- lib/Target/PowerPC/PPCISelLowering.h | 60 +- lib/Target/PowerPC/PPCInstr64Bit.td | 115 +- lib/Target/PowerPC/PPCInstrAltivec.td | 32 + lib/Target/PowerPC/PPCInstrFormats.td | 41 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 24 +- lib/Target/PowerPC/PPCInstrInfo.td | 200 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 36 +- lib/Target/PowerPC/PPCRegisterInfo.h | 5 +- lib/Target/PowerPC/PPCSchedule.td | 88 +- lib/Target/PowerPC/PPCSchedule440.td | 60 +- lib/Target/PowerPC/PPCScheduleA2.td | 81 +- lib/Target/PowerPC/PPCScheduleE500mc.td | 265 ++ lib/Target/PowerPC/PPCScheduleE5500.td | 309 ++ lib/Target/PowerPC/PPCScheduleG3.td | 7 +- lib/Target/PowerPC/PPCScheduleG4.td | 7 +- lib/Target/PowerPC/PPCScheduleG4Plus.td | 8 +- lib/Target/PowerPC/PPCScheduleG5.td | 10 +- lib/Target/PowerPC/PPCSubtarget.cpp | 15 +- lib/Target/PowerPC/PPCSubtarget.h | 57 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 5 +- lib/Target/PowerPC/PPCTargetMachine.h | 15 +- lib/Target/README.txt | 7 +- lib/Target/Sparc/SparcFrameLowering.cpp | 2 +- lib/Target/Sparc/SparcISelLowering.cpp | 2 +- lib/Target/Sparc/SparcInstrInfo.td | 2 +- lib/Target/Sparc/SparcTargetMachine.cpp | 4 +- lib/Target/Sparc/SparcTargetMachine.h | 15 +- lib/Target/Target.cpp | 19 +- lib/Target/TargetData.cpp | 663 ---- lib/Target/TargetELFWriterInfo.cpp | 25 - lib/Target/TargetLibraryInfo.cpp | 75 +- lib/Target/TargetLoweringObjectFile.cpp | 4 +- lib/Target/TargetMachineC.cpp | 12 +- lib/Target/TargetRegisterInfo.cpp | 6 +- lib/Target/TargetTransformImpl.cpp | 353 ++ lib/Target/X86/AsmParser/X86AsmLexer.cpp | 43 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 445 ++- lib/Target/X86/CMakeLists.txt | 1 - lib/Target/X86/Disassembler/X86Disassembler.cpp | 12 +- lib/Target/X86/Disassembler/X86Disassembler.h | 2 +- .../X86/Disassembler/X86DisassemblerDecoder.c | 16 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 12 +- .../Disassembler/X86DisassemblerDecoderCommon.h | 5 + lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 66 +- lib/Target/X86/InstPrinter/X86ATTInstPrinter.h | 3 +- lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp | 48 +- lib/Target/X86/InstPrinter/X86IntelInstPrinter.h | 5 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 70 +- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 16 +- lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp | 25 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 5 + lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 37 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 115 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 11 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 48 +- lib/Target/X86/README-SSE.txt | 12 + lib/Target/X86/X86.td | 14 +- lib/Target/X86/X86AsmPrinter.cpp | 113 +- lib/Target/X86/X86AsmPrinter.h | 37 +- lib/Target/X86/X86COFFMachineModuleInfo.h | 2 +- lib/Target/X86/X86CallingConv.td | 59 +- lib/Target/X86/X86CodeEmitter.cpp | 61 +- lib/Target/X86/X86ELFWriterInfo.cpp | 147 - lib/Target/X86/X86ELFWriterInfo.h | 59 - lib/Target/X86/X86FastISel.cpp | 56 +- lib/Target/X86/X86FloatingPoint.cpp | 6 +- lib/Target/X86/X86FrameLowering.cpp | 13 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 609 ++-- lib/Target/X86/X86ISelLowering.cpp | 3120 +++++++++++----- lib/Target/X86/X86ISelLowering.h | 154 +- lib/Target/X86/X86InstrCompiler.td | 475 ++- lib/Target/X86/X86InstrControl.td | 7 +- lib/Target/X86/X86InstrFMA.td | 432 ++- lib/Target/X86/X86InstrFormats.td | 91 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 31 +- lib/Target/X86/X86InstrInfo.cpp | 536 +-- lib/Target/X86/X86InstrInfo.h | 3 + lib/Target/X86/X86InstrInfo.td | 83 +- lib/Target/X86/X86InstrMMX.td | 75 +- lib/Target/X86/X86InstrSSE.td | 1488 +++++--- lib/Target/X86/X86InstrShiftRotate.td | 78 + lib/Target/X86/X86InstrTSX.td | 32 + lib/Target/X86/X86InstrXOP.td | 17 +- lib/Target/X86/X86JITInfo.cpp | 17 +- lib/Target/X86/X86MCInstLower.cpp | 54 +- lib/Target/X86/X86MCInstLower.h | 52 - lib/Target/X86/X86RegisterInfo.cpp | 66 +- lib/Target/X86/X86RegisterInfo.h | 9 +- lib/Target/X86/X86RegisterInfo.td | 496 +-- lib/Target/X86/X86SelectionDAGInfo.cpp | 2 +- lib/Target/X86/X86Subtarget.cpp | 27 +- lib/Target/X86/X86Subtarget.h | 31 +- lib/Target/X86/X86TargetMachine.cpp | 19 +- lib/Target/X86/X86TargetMachine.h | 32 +- lib/Target/X86/X86VZeroUpper.cpp | 12 +- lib/Target/XCore/XCoreAsmPrinter.cpp | 4 +- lib/Target/XCore/XCoreFrameLowering.cpp | 13 +- lib/Target/XCore/XCoreISelLowering.cpp | 16 +- lib/Target/XCore/XCoreInstrInfo.td | 4 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 2 +- lib/Target/XCore/XCoreTargetMachine.cpp | 4 +- lib/Target/XCore/XCoreTargetMachine.h | 15 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 64 +- lib/Transforms/IPO/BarrierNoopPass.cpp | 47 + lib/Transforms/IPO/CMakeLists.txt | 1 + lib/Transforms/IPO/ConstantMerge.cpp | 8 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 107 +- lib/Transforms/IPO/ExtractGV.cpp | 63 +- lib/Transforms/IPO/FunctionAttrs.cpp | 34 +- lib/Transforms/IPO/GlobalOpt.cpp | 93 +- lib/Transforms/IPO/IPO.cpp | 7 +- lib/Transforms/IPO/InlineAlways.cpp | 6 +- lib/Transforms/IPO/InlineSimple.cpp | 4 +- lib/Transforms/IPO/Inliner.cpp | 28 +- lib/Transforms/IPO/Internalize.cpp | 38 +- lib/Transforms/IPO/MergeFunctions.cpp | 26 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 56 +- lib/Transforms/IPO/PruneEH.cpp | 10 +- lib/Transforms/InstCombine/InstCombine.h | 14 +- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 2 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 172 +- lib/Transforms/InstCombine/InstCombineCasts.cpp | 25 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 70 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 201 +- .../InstCombine/InstCombineMulDivRem.cpp | 17 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 2 +- lib/Transforms/InstCombine/InstCombineSelect.cpp | 30 +- lib/Transforms/InstCombine/InstCombineShifts.cpp | 2 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 2 +- .../InstCombine/InstCombineVectorOps.cpp | 7 +- lib/Transforms/InstCombine/InstCombineWorklist.h | 4 +- .../InstCombine/InstructionCombining.cpp | 401 ++- .../Instrumentation/AddressSanitizer.cpp | 437 ++- lib/Transforms/Instrumentation/BlackList.cpp | 105 + lib/Transforms/Instrumentation/BlackList.h | 57 + lib/Transforms/Instrumentation/BoundsChecking.cpp | 16 +- lib/Transforms/Instrumentation/CMakeLists.txt | 2 +- .../Instrumentation/FunctionBlackList.cpp | 79 - lib/Transforms/Instrumentation/FunctionBlackList.h | 37 - lib/Transforms/Instrumentation/GCOVProfiling.cpp | 73 +- .../Instrumentation/MaximumSpanningTree.h | 53 +- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 137 +- lib/Transforms/Scalar/CMakeLists.txt | 1 + lib/Transforms/Scalar/CodeGenPrepare.cpp | 80 +- lib/Transforms/Scalar/ConstantProp.cpp | 4 +- .../Scalar/CorrelatedValuePropagation.cpp | 5 + lib/Transforms/Scalar/DCE.cpp | 17 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 203 +- lib/Transforms/Scalar/EarlyCSE.cpp | 120 +- lib/Transforms/Scalar/GVN.cpp | 99 +- lib/Transforms/Scalar/GlobalMerge.cpp | 10 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 42 +- lib/Transforms/Scalar/JumpThreading.cpp | 8 +- lib/Transforms/Scalar/LICM.cpp | 45 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 37 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 6 +- lib/Transforms/Scalar/LoopRotation.cpp | 65 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 58 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 9 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 13 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 72 +- lib/Transforms/Scalar/ObjCARC.cpp | 150 +- lib/Transforms/Scalar/Reassociate.cpp | 105 +- lib/Transforms/Scalar/SCCP.cpp | 10 +- lib/Transforms/Scalar/SROA.cpp | 3697 +++++++++++++++++++ lib/Transforms/Scalar/Scalar.cpp | 3 +- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 198 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 62 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 980 +---- lib/Transforms/Utils/AddrModeMatcher.cpp | 6 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 45 +- lib/Transforms/Utils/BuildLibCalls.cpp | 154 +- lib/Transforms/Utils/BypassSlowDivision.cpp | 262 ++ lib/Transforms/Utils/CMakeLists.txt | 4 + lib/Transforms/Utils/CloneFunction.cpp | 14 +- lib/Transforms/Utils/CodeExtractor.cpp | 2 +- lib/Transforms/Utils/InlineFunction.cpp | 4 +- lib/Transforms/Utils/IntegerDivision.cpp | 420 +++ lib/Transforms/Utils/LCSSA.cpp | 15 +- lib/Transforms/Utils/Local.cpp | 82 +- lib/Transforms/Utils/LoopSimplify.cpp | 7 + lib/Transforms/Utils/MetaRenamer.cpp | 132 + lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 18 +- lib/Transforms/Utils/SSAUpdater.cpp | 2 +- lib/Transforms/Utils/SimplifyCFG.cpp | 1507 ++++++-- lib/Transforms/Utils/SimplifyIndVar.cpp | 6 +- lib/Transforms/Utils/SimplifyInstructions.cpp | 6 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 1149 ++++++ lib/Transforms/Utils/Utils.cpp | 1 + lib/Transforms/Utils/ValueMapper.cpp | 2 +- lib/Transforms/Vectorize/BBVectorize.cpp | 1012 ++++-- lib/Transforms/Vectorize/CMakeLists.txt | 1 + lib/Transforms/Vectorize/LoopVectorize.cpp | 1941 ++++++++++ lib/Transforms/Vectorize/Vectorize.cpp | 8 +- lib/VMCore/AsmWriter.cpp | 129 +- lib/VMCore/Attributes.cpp | 518 ++- lib/VMCore/AttributesImpl.h | 71 + lib/VMCore/AutoUpgrade.cpp | 3 +- lib/VMCore/CMakeLists.txt | 6 +- lib/VMCore/ConstantFold.cpp | 28 +- lib/VMCore/Constants.cpp | 27 + lib/VMCore/ConstantsContext.h | 35 +- lib/VMCore/Core.cpp | 62 +- lib/VMCore/DIBuilder.cpp | 40 +- lib/VMCore/DataLayout.cpp | 749 ++++ lib/VMCore/DebugInfo.cpp | 10 + lib/VMCore/Dominators.cpp | 10 + lib/VMCore/Function.cpp | 21 +- lib/VMCore/GCOV.cpp | 30 +- lib/VMCore/IRBuilder.cpp | 6 +- lib/VMCore/InlineAsm.cpp | 13 +- lib/VMCore/Instructions.cpp | 63 +- lib/VMCore/LLVMContext.cpp | 5 + lib/VMCore/LLVMContextImpl.cpp | 18 +- lib/VMCore/LLVMContextImpl.h | 9 +- lib/VMCore/Makefile | 1 - lib/VMCore/PassManager.cpp | 2 +- lib/VMCore/TargetTransformInfo.cpp | 31 + lib/VMCore/Type.cpp | 41 +- lib/VMCore/User.cpp | 9 + lib/VMCore/Value.cpp | 2 +- lib/VMCore/ValueTypes.cpp | 30 +- lib/VMCore/Verifier.cpp | 181 +- 709 files changed, 53708 insertions(+), 18126 deletions(-) create mode 100644 lib/Analysis/CostModel.cpp create mode 100644 lib/Analysis/DependenceAnalysis.cpp delete mode 100644 lib/Analysis/LoopDependenceAnalysis.cpp create mode 100644 lib/Analysis/ProfileDataLoader.cpp create mode 100644 lib/Analysis/ProfileDataLoaderPass.cpp create mode 100644 lib/CodeGen/MachinePostDominators.cpp create mode 100644 lib/CodeGen/StackColoring.cpp create mode 100644 lib/CodeGen/TargetSchedule.cpp create mode 100644 lib/DebugInfo/DWARFDebugRangeList.cpp create mode 100644 lib/DebugInfo/DWARFDebugRangeList.h create mode 100644 lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h create mode 100644 lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h create mode 100644 lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h create mode 100644 lib/ExecutionEngine/IntelJITEvents/jitprofiling.c create mode 100644 lib/ExecutionEngine/IntelJITEvents/jitprofiling.h delete mode 100644 lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp delete mode 100644 lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h delete mode 100644 lib/ExecutionEngine/RuntimeDyld/ObjectImage.h create mode 100644 lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h delete mode 100644 lib/TableGen/TableGenAction.cpp delete mode 100644 lib/Target/ARM/ARMELFWriterInfo.cpp delete mode 100644 lib/Target/ARM/ARMELFWriterInfo.h create mode 100644 lib/Target/ARM/ARMScheduleSwift.td create mode 100644 lib/Target/Hexagon/HexagonMachineScheduler.cpp create mode 100644 lib/Target/Hexagon/HexagonMachineScheduler.h delete mode 100644 lib/Target/MBlaze/MBlazeELFWriterInfo.cpp delete mode 100644 lib/Target/MBlaze/MBlazeELFWriterInfo.h create mode 100644 lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h create mode 100644 lib/Target/Mips/MipsDSPInstrFormats.td create mode 100644 lib/Target/Mips/MipsDSPInstrInfo.td create mode 100644 lib/Target/PowerPC/PPCScheduleE500mc.td create mode 100644 lib/Target/PowerPC/PPCScheduleE5500.td delete mode 100644 lib/Target/TargetData.cpp delete mode 100644 lib/Target/TargetELFWriterInfo.cpp create mode 100644 lib/Target/TargetTransformImpl.cpp delete mode 100644 lib/Target/X86/X86ELFWriterInfo.cpp delete mode 100644 lib/Target/X86/X86ELFWriterInfo.h create mode 100644 lib/Target/X86/X86InstrTSX.td delete mode 100644 lib/Target/X86/X86MCInstLower.h create mode 100644 lib/Transforms/IPO/BarrierNoopPass.cpp create mode 100644 lib/Transforms/Instrumentation/BlackList.cpp create mode 100644 lib/Transforms/Instrumentation/BlackList.h delete mode 100644 lib/Transforms/Instrumentation/FunctionBlackList.cpp delete mode 100644 lib/Transforms/Instrumentation/FunctionBlackList.h create mode 100644 lib/Transforms/Scalar/SROA.cpp create mode 100644 lib/Transforms/Utils/BypassSlowDivision.cpp create mode 100644 lib/Transforms/Utils/IntegerDivision.cpp create mode 100644 lib/Transforms/Utils/MetaRenamer.cpp create mode 100644 lib/Transforms/Utils/SimplifyLibCalls.cpp create mode 100644 lib/Transforms/Vectorize/LoopVectorize.cpp create mode 100644 lib/VMCore/AttributesImpl.h create mode 100644 lib/VMCore/DataLayout.cpp create mode 100644 lib/VMCore/TargetTransformInfo.cpp (limited to 'lib') diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 3b6aab1..752edd5 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -35,7 +35,8 @@ #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Type.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; // Register the AliasAnalysis interface, providing a nice name to refer to. @@ -451,7 +452,8 @@ AliasAnalysis::~AliasAnalysis() {} /// AliasAnalysis interface before any other methods are called. /// void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { - TD = P->getAnalysisIfAvailable(); + TD = P->getAnalysisIfAvailable(); + TLI = P->getAnalysisIfAvailable(); AA = &P->getAnalysis(); } @@ -461,7 +463,7 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); // All AA's chain } -/// getTypeStoreSize - Return the TargetData store size for the given type, +/// getTypeStoreSize - Return the DataLayout store size for the given type, /// if known, or a conservative value otherwise. /// uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { @@ -501,7 +503,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, bool llvm::isNoAliasCall(const Value *V) { if (isa(V) || isa(V)) return ImmutableCallSite(cast(V)) - .paramHasAttr(0, Attribute::NoAlias); + .paramHasAttr(0, Attributes::NoAlias); return false; } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 92e8906..388c755 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -18,7 +18,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Type.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -550,7 +550,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { //===----------------------------------------------------------------------===// void AliasSet::print(raw_ostream &OS) const { - OS << " AliasSet[" << (void*)this << ", " << RefCount << "] "; + OS << " AliasSet[" << (const void*)this << ", " << RefCount << "] "; OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; switch (AccessTy) { case NoModRef: OS << "No access "; break; @@ -590,8 +590,10 @@ void AliasSetTracker::print(raw_ostream &OS) const { OS << "\n"; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void AliasSet::dump() const { print(dbgs()); } void AliasSetTracker::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // ASTCallbackVH Class Implementation diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 0ba6af9..9dc81a6 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -26,11 +26,13 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeBasicAliasAnalysisPass(Registry); initializeBlockFrequencyInfoPass(Registry); initializeBranchProbabilityInfoPass(Registry); + initializeCostModelAnalysisPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); initializePrintDbgInfoPass(Registry); + initializeDependenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); @@ -46,7 +48,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); - initializeLoopDependenceAnalysisPass(Registry); initializeLoopInfoPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); @@ -61,6 +62,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializePathProfileLoaderPassPass(Registry); initializeProfileVerifierPassPass(Registry); initializePathProfileVerifierPass(Registry); + initializeProfileMetadataLoaderPassPass(Registry); initializeRegionInfoPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 1d028c2..4bb93ee 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -29,7 +29,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -58,12 +58,12 @@ static bool isNonEscapingLocalObject(const Value *V) { // then it has not escaped before entering the function. Check if it escapes // inside the function. if (const Argument *A = dyn_cast(V)) - if (A->hasByValAttr() || A->hasNoAliasAttr()) { - // Don't bother analyzing arguments already known not to escape. - if (A->hasNoCaptureAttr()) - return true; + if (A->hasByValAttr() || A->hasNoAliasAttr()) + // Note even if the argument is marked nocapture we still need to check + // for copies made inside the function. The nocapture attribute only + // specifies that there are no copies made that outlive the function. return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); - } + return false; } @@ -84,10 +84,11 @@ static bool isEscapeSource(const Value *V) { /// getObjectSize - Return the size of the object specified by V, or /// UnknownSize if unknown. -static uint64_t getObjectSize(const Value *V, const TargetData &TD, +static uint64_t getObjectSize(const Value *V, const DataLayout &TD, + const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &TD, RoundToAlign)) + if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -95,10 +96,11 @@ static uint64_t getObjectSize(const Value *V, const TargetData &TD, /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, - const TargetData &TD) { + const DataLayout &TD, + const TargetLibraryInfo &TLI) { // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. - uint64_t ObjectSize = getObjectSize(V, TD, /*RoundToAlign*/true); + uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true); return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; } @@ -106,8 +108,8 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, /// isObjectSize - Return true if we can prove that the object specified /// by V has size Size. static bool isObjectSize(const Value *V, uint64_t Size, - const TargetData &TD) { - uint64_t ObjectSize = getObjectSize(V, TD); + const DataLayout &TD, const TargetLibraryInfo &TLI) { + uint64_t ObjectSize = getObjectSize(V, TD, TLI); return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; } @@ -126,6 +128,15 @@ namespace { const Value *V; ExtensionKind Extension; int64_t Scale; + + bool operator==(const VariableGEPIndex &Other) const { + return V == Other.V && Extension == Other.Extension && + Scale == Other.Scale; + } + + bool operator!=(const VariableGEPIndex &Other) const { + return !operator==(Other); + } }; } @@ -140,7 +151,7 @@ namespace { /// represented in the result. static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, ExtensionKind &Extension, - const TargetData &TD, unsigned Depth) { + const DataLayout &TD, unsigned Depth) { assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. @@ -215,14 +226,14 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, /// specified amount, but which may have other unrepresented high bits. As such, /// the gep cannot necessarily be reconstructed from its decomposed form. /// -/// When TargetData is around, this function is capable of analyzing everything +/// When DataLayout is around, this function is capable of analyzing everything /// that GetUnderlyingObject can look through. When not, it just looks /// through pointer casts. /// static const Value * DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, SmallVectorImpl &VarIndices, - const TargetData *TD) { + const DataLayout *TD) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = 6; @@ -266,7 +277,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, ->getElementType()->isSized()) return V; - // If we are lacking TargetData information, we can't compute the offets of + // If we are lacking DataLayout information, we can't compute the offets of // elements computed by GEPs. However, we can handle bitcast equivalent // GEPs. if (TD == 0) { @@ -417,13 +428,7 @@ namespace { /// BasicAliasAnalysis - This is the primary alias analysis implementation. struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : ImmutablePass(ID), - // AliasCache rarely has more than 1 or 2 elements, - // so start it off fairly small so that clear() - // doesn't have to tromp through 64 (the default) - // elements on each alias query. This really wants - // something like a SmallDenseMap. - AliasCache(8) { + BasicAliasAnalysis() : ImmutablePass(ID) { initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); } @@ -443,7 +448,11 @@ namespace { "BasicAliasAnalysis doesn't support interprocedural queries."); AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, LocB.Ptr, LocB.Size, LocB.TBAATag); - AliasCache.clear(); + // AliasCache rarely has more than 1 or 2 elements, always use + // shrink_and_clear so it quickly returns to the inline capacity of the + // SmallDenseMap if it ever grows larger. + // FIXME: This should really be shrink_to_inline_capacity_and_clear(). + AliasCache.shrink_and_clear(); return Alias; } @@ -481,7 +490,7 @@ namespace { private: // AliasCache - Track alias queries to guard against recursion. typedef std::pair LocPair; - typedef DenseMap AliasCacheTy; + typedef SmallDenseMap AliasCacheTy; AliasCacheTy AliasCache; // Visited - Track instructions visited by pointsToConstantMemory. @@ -490,6 +499,7 @@ namespace { // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, + const MDNode *V1TBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2); @@ -807,6 +817,21 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } +static bool areVarIndicesEqual(SmallVector &Indices1, + SmallVector &Indices2) { + unsigned Size1 = Indices1.size(); + unsigned Size2 = Indices2.size(); + + if (Size1 != Size2) + return false; + + for (unsigned I = 0; I != Size1; ++I) + if (Indices1[I] != Indices2[I]) + return false; + + return true; +} + /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD), @@ -814,6 +839,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, /// AliasAnalysis::AliasResult BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, + const MDNode *V1TBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo, const Value *UnderlyingV1, @@ -821,9 +847,41 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, int64_t GEP1BaseOffset; SmallVector GEP1VariableIndices; - // If we have two gep instructions with must-alias'ing base pointers, figure - // out if the indexes to the GEP tell us anything about the derived pointer. + // If we have two gep instructions with must-alias or not-alias'ing base + // pointers, figure out if the indexes to the GEP tell us anything about the + // derived pointer. if (const GEPOperator *GEP2 = dyn_cast(V2)) { + // Check for geps of non-aliasing underlying pointers where the offsets are + // identical. + if (V1Size == V2Size) { + // Do the base pointers alias assuming type and size. + AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, + V1TBAAInfo, UnderlyingV2, + V2Size, V2TBAAInfo); + if (PreciseBaseAlias == NoAlias) { + // See if the computed offset from the common pointer tells us about the + // relation of the resulting pointer. + int64_t GEP2BaseOffset; + SmallVector GEP2VariableIndices; + const Value *GEP2BasePtr = + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no DataLayout. + if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { + assert(TD == 0 && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + return MayAlias; + } + // Same offsets. + if (GEP1BaseOffset == GEP2BaseOffset && + areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices)) + return NoAlias; + GEP1VariableIndices.clear(); + } + } + // Do the base pointers alias? AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, UnderlyingV2, UnknownSize, 0); @@ -843,9 +901,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); - // If DecomposeGEPExpression isn't able to look all the way through the - // addressing operation, we must not have TD and this is too complex for us - // to handle without it. + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { assert(TD == 0 && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); @@ -879,9 +936,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); - // If DecomposeGEPExpression isn't able to look all the way through the - // addressing operation, we must not have TD and this is too complex for us - // to handle without it. + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1) { assert(TD == 0 && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); @@ -1004,12 +1060,42 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, // on corresponding edges. if (const PHINode *PN2 = dyn_cast(V2)) if (PN2->getParent() == PN->getParent()) { + LocPair Locs(Location(PN, PNSize, PNTBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + if (PN > V2) + std::swap(Locs.first, Locs.second); + AliasResult Alias = aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), V2Size, V2TBAAInfo); if (Alias == MayAlias) return MayAlias; + + // If the first source of the PHI nodes NoAlias and the other inputs are + // the PHI node itself through some amount of recursion this does not add + // any new information so just return NoAlias. + // bb: + // ptr = ptr2 + 1 + // loop: + // ptr_phi = phi [bb, ptr], [loop, ptr_plus_one] + // ptr2_phi = phi [bb, ptr2], [loop, ptr2_plus_one] + // ... + // ptr_plus_one = gep ptr_phi, 1 + // ptr2_plus_one = gep ptr2_phi, 1 + // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do + // not alias each other. + bool ArePhisAssumedNoAlias = false; + AliasResult OrigAliasResult = NoAlias; + if (Alias == NoAlias) { + // Pretend the phis do not alias. + assert(AliasCache.count(Locs) && + "There must exist an entry for the phi node"); + OrigAliasResult = AliasCache[Locs]; + AliasCache[Locs] = NoAlias; + ArePhisAssumedNoAlias = true; + } + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, @@ -1019,6 +1105,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, if (Alias == MayAlias) break; } + + // Reset if speculation failed. + if (ArePhisAssumedNoAlias && Alias != NoAlias) + AliasCache[Locs] = OrigAliasResult; + return Alias; } @@ -1133,8 +1224,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. if (TD) - if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) || - (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD))) + if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) || + (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -1154,15 +1245,17 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1, V2); std::swap(V1Size, V2Size); std::swap(O1, O2); + std::swap(V1TBAAInfo, V2TBAAInfo); } if (const GEPOperator *GV1 = dyn_cast(V1)) { - AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2); + AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2); if (Result != MayAlias) return AliasCache[Locs] = Result; } if (isa(V2) && !isa(V1)) { std::swap(V1, V2); std::swap(V1Size, V2Size); + std::swap(V1TBAAInfo, V2TBAAInfo); } if (const PHINode *PN = dyn_cast(V1)) { AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo, @@ -1173,6 +1266,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (isa(V2) && !isa(V1)) { std::swap(V1, V2); std::swap(V1Size, V2Size); + std::swap(V1TBAAInfo, V2TBAAInfo); } if (const SelectInst *S1 = dyn_cast(V1)) { AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo, @@ -1184,8 +1278,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // accesses is accessing the entire object, then the accesses must // overlap in some way. if (TD && O1 == O2) - if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) || - (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD))) + if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD, *TLI)) || + (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD, *TLI))) return AliasCache[Locs] = PartialAlias; AliasResult Result = diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index b255ce6..04a6560 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -115,14 +115,14 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { return false; } - SmallPtrSet UnreachableEdges; - SmallPtrSet ReachableEdges; + SmallVector UnreachableEdges; + SmallVector ReachableEdges; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { if (PostDominatedByUnreachable.count(*I)) - UnreachableEdges.insert(*I); + UnreachableEdges.push_back(I.getSuccessorIndex()); else - ReachableEdges.insert(*I); + ReachableEdges.push_back(I.getSuccessorIndex()); } // If all successors are in the set of blocks post-dominated by unreachable, @@ -136,18 +136,19 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { return false; uint32_t UnreachableWeight = - std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT); - for (SmallPtrSet::iterator I = UnreachableEdges.begin(), - E = UnreachableEdges.end(); + std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); + for (SmallVector::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, UnreachableWeight); if (ReachableEdges.empty()) return true; uint32_t ReachableWeight = - std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT); - for (SmallPtrSet::iterator I = ReachableEdges.begin(), - E = ReachableEdges.end(); + std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), + NORMAL_WEIGHT); + for (SmallVector::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, ReachableWeight); @@ -187,7 +188,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); + setEdgeWeight(BB, i, Weights[i]); return true; } @@ -211,19 +212,17 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { assert(CI->getOperand(1)->getType()->isPointerTy()); - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); - // p != 0 -> isProb = true // p == 0 -> isProb = false // p != q -> isProb = true // p == q -> isProb = false; + unsigned TakenIdx = 0, NonTakenIdx = 1; bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT); return true; } @@ -234,17 +233,17 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (!L) return false; - SmallPtrSet BackEdges; - SmallPtrSet ExitingEdges; - SmallPtrSet InEdges; // Edges from header to the loop. + SmallVector BackEdges; + SmallVector ExitingEdges; + SmallVector InEdges; // Edges from header to the loop. for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { if (!L->contains(*I)) - ExitingEdges.insert(*I); + ExitingEdges.push_back(I.getSuccessorIndex()); else if (L->getHeader() == *I) - BackEdges.insert(*I); + BackEdges.push_back(I.getSuccessorIndex()); else - InEdges.insert(*I); + InEdges.push_back(I.getSuccessorIndex()); } if (uint32_t numBackEdges = BackEdges.size()) { @@ -252,10 +251,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (backWeight < NORMAL_WEIGHT) backWeight = NORMAL_WEIGHT; - for (SmallPtrSet::iterator EI = BackEdges.begin(), + for (SmallVector::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { - BasicBlock *Back = *EI; - setEdgeWeight(BB, Back, backWeight); + setEdgeWeight(BB, *EI, backWeight); } } @@ -264,10 +262,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (inWeight < NORMAL_WEIGHT) inWeight = NORMAL_WEIGHT; - for (SmallPtrSet::iterator EI = InEdges.begin(), + for (SmallVector::iterator EI = InEdges.begin(), EE = InEdges.end(); EI != EE; ++EI) { - BasicBlock *Back = *EI; - setEdgeWeight(BB, Back, inWeight); + setEdgeWeight(BB, *EI, inWeight); } } @@ -276,10 +273,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; - for (SmallPtrSet::iterator EI = ExitingEdges.begin(), + for (SmallVector::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { - BasicBlock *Exiting = *EI; - setEdgeWeight(BB, Exiting, exitWeight); + setEdgeWeight(BB, *EI, exitWeight); } } @@ -335,14 +331,13 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { return false; } - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); + unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); return true; } @@ -372,14 +367,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { return false; } - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); + unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT); return true; } @@ -389,11 +383,8 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { if (!II) return false; - BasicBlock *Normal = II->getNormalDest(); - BasicBlock *Unwind = II->getUnwindDest(); - - setEdgeWeight(BB, Normal, IH_TAKEN_WEIGHT); - setEdgeWeight(BB, Unwind, IH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT); + setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT); return true; } @@ -450,8 +441,7 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { uint32_t Sum = 0; for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - const BasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex()); uint32_t PrevSum = Sum; Sum += Weight; @@ -494,11 +484,13 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { return 0; } -// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value. +/// Get the raw edge weight for the edge. If can't find it, return +/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index +/// to the successors. uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { - Edge E(Src, Dst); - DenseMap::const_iterator I = Weights.find(E); +getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { + DenseMap::const_iterator I = + Weights.find(std::make_pair(Src, IndexInSuccessors)); if (I != Weights.end()) return I->second; @@ -506,15 +498,43 @@ getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { return DEFAULT_WEIGHT; } +/// Get the raw edge weight calculated for the block pair. This returns the sum +/// of all raw edge weights from Src to Dst. +uint32_t BranchProbabilityInfo:: +getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { + uint32_t Weight = 0; + DenseMap::const_iterator MapI; + for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) + if (*I == Dst) { + MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex())); + if (MapI != Weights.end()) + Weight += MapI->second; + } + return (Weight == 0) ? DEFAULT_WEIGHT : Weight; +} + +/// Set the edge weight for a given edge specified by PredBlock and an index +/// to the successors. void BranchProbabilityInfo:: -setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) { - Weights[std::make_pair(Src, Dst)] = Weight; +setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, + uint32_t Weight) { + Weights[std::make_pair(Src, IndexInSuccessors)] = Weight; DEBUG(dbgs() << "set edge " << Src->getName() << " -> " - << Dst->getName() << " weight to " << Weight - << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); + << IndexInSuccessors << " successor weight to " + << Weight << "\n"); } +/// Get an edge's probability, relative to other out-edges from Src. +BranchProbability BranchProbabilityInfo:: +getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const { + uint32_t N = getEdgeWeight(Src, IndexInSuccessors); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} +/// Get the probability of going from Src to Dst. It returns the sum of all +/// probabilities for edges from Src to Dst. BranchProbability BranchProbabilityInfo:: getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 96e68b4..b3a40be 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -10,9 +10,11 @@ add_llvm_library(LLVMAnalysis BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp + CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp DbgInfoPrinter.cpp + DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp @@ -26,7 +28,6 @@ add_llvm_library(LLVMAnalysis LibCallSemantics.cpp Lint.cpp Loads.cpp - LoopDependenceAnalysis.cpp LoopInfo.cpp LoopPass.cpp MemDepPrinter.cpp @@ -44,6 +45,8 @@ add_llvm_library(LLVMAnalysis ProfileInfoLoader.cpp ProfileInfoLoaderPass.cpp ProfileVerifierPass.cpp + ProfileDataLoader.cpp + ProfileDataLoaderPass.cpp RegionInfo.cpp RegionPass.cpp RegionPrinter.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 974b906..d9c0299 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -23,6 +23,8 @@ using namespace llvm; CaptureTracker::~CaptureTracker() {} +bool CaptureTracker::shouldExplore(Use *U) { return true; } + namespace { struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) @@ -30,8 +32,6 @@ namespace { void tooManyUses() { Captured = true; } - bool shouldExplore(Use *U) { return true; } - bool captured(Use *U) { if (isa(U->getUser()) && !ReturnCaptures) return false; diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index acda34b..651a54b 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -15,7 +15,7 @@ #include "llvm/Function.h" #include "llvm/Support/CallSite.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; @@ -54,7 +54,7 @@ bool llvm::callIsSmall(ImmutableCallSite CS) { return false; } -bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) { +bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) { if (isa(I)) return true; @@ -119,7 +119,7 @@ bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) { /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const TargetData *TD) { + const DataLayout *TD) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); @@ -189,14 +189,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; } -void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { +void CodeMetrics::analyzeFunction(Function *F, const DataLayout *TD) { // If this function contains a call that "returns twice" (e.g., setjmp or // _setjmp) and it isn't marked with "returns twice" itself, never inline it. // This is a hack because we depend on the user marking their local variables // as volatile if they are live across a setjmp call, and they probably // won't do this in callers. exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->hasFnAttr(Attribute::ReturnsTwice); + !F->getFnAttributes().hasAttribute(Attributes::ReturnsTwice); // Look at the size of the callee. for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index f5e619c..91a5b84 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -11,7 +11,7 @@ // // Also, to supplement the basic VMCore ConstantExpr simplifications, // this file defines some additional folding routines that can make use of -// TargetData information. These functions cannot go in VMCore due to library +// DataLayout information. These functions cannot go in VMCore due to library // dependency issues. // //===----------------------------------------------------------------------===// @@ -25,7 +25,7 @@ #include "llvm/Intrinsics.h" #include "llvm/Operator.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -41,11 +41,11 @@ using namespace llvm; // Constant Folding internal helper functions //===----------------------------------------------------------------------===// -/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with -/// TargetData. This always returns a non-null constant, but it may be a +/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with +/// DataLayout. This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. static Constant *FoldBitCast(Constant *C, Type *DestTy, - const TargetData &TD) { + const DataLayout &TD) { // Catch the obvious splat cases. if (C->isNullValue() && !DestTy->isX86_MMXTy()) return Constant::getNullValue(DestTy); @@ -59,9 +59,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, return ConstantExpr::getBitCast(C, DestTy); unsigned NumSrcElts = CDV->getType()->getNumElements(); - + Type *SrcEltTy = CDV->getType()->getElementType(); - + // If the vector is a vector of floating point, convert it to vector of int // to simplify things. if (SrcEltTy->isFloatingPointTy()) { @@ -72,7 +72,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, C = ConstantExpr::getBitCast(C, SrcIVTy); CDV = cast(C); } - + // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); @@ -84,43 +84,43 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, else Result |= CDV->getElementAsInteger(i); } - + return ConstantInt::get(IT, Result); } - + // The code below only handles casts to vectors currently. VectorType *DestVTy = dyn_cast(DestTy); if (DestVTy == 0) return ConstantExpr::getBitCast(C, DestTy); - + // If this is a scalar -> vector cast, convert the input into a <1 x scalar> // vector so the code below can handle it uniformly. if (isa(C) || isa(C)) { Constant *Ops = C; // don't take the address of C! return FoldBitCast(ConstantVector::get(Ops), DestTy, TD); } - + // If this is a bitcast from constant vector -> vector, fold it. if (!isa(C) && !isa(C)) return ConstantExpr::getBitCast(C, DestTy); - + // If the element types match, VMCore can fold it. unsigned NumDstElt = DestVTy->getNumElements(); unsigned NumSrcElt = C->getType()->getVectorNumElements(); if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); - + Type *SrcEltTy = C->getType()->getVectorElementType(); Type *DstEltTy = DestVTy->getElementType(); - - // Otherwise, we're changing the number of elements in a vector, which + + // Otherwise, we're changing the number of elements in a vector, which // requires endianness information to do the right thing. For example, // bitcast (<2 x i64> to <4 x i32>) // folds to (little endian): // <4 x i32> // and to (big endian): // <4 x i32> - + // First thing is first. We only want to think about integer here, so if // we have something in FP form, recast it as integer. if (DstEltTy->isFloatingPointTy()) { @@ -130,11 +130,11 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); - + // Finally, VMCore can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); } - + // Okay, we know the destination is integer, if the input is FP, convert // it to integer first. if (SrcEltTy->isFloatingPointTy()) { @@ -148,13 +148,13 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, !isa(C)) return C; } - + // Now we know that the input and output vectors are both integer vectors // of the same size, and that their #elements is not the same. Do the // conversion here, which depends on whether the input or output has // more elements. bool isLittleEndian = TD.isLittleEndian(); - + SmallVector Result; if (NumDstElt < NumSrcElt) { // Handle: bitcast (<4 x i32> to <2 x i64>) @@ -170,15 +170,15 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, Constant *Src =dyn_cast(C->getAggregateElement(SrcElt++)); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); - + // Zero extend the element to the right size. Src = ConstantExpr::getZExt(Src, Elt->getType()); - + // Shift it to the right place, depending on endianness. - Src = ConstantExpr::getShl(Src, + Src = ConstantExpr::getShl(Src, ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; - + // Mix it in. Elt = ConstantExpr::getOr(Elt, Src); } @@ -186,30 +186,30 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } return ConstantVector::get(Result); } - + // Handle: bitcast (<2 x i64> to <4 x i32>) unsigned Ratio = NumDstElt/NumSrcElt; unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); - + // Loop over each source value, expanding into multiple results. for (unsigned i = 0; i != NumSrcElt; ++i) { Constant *Src = dyn_cast(C->getAggregateElement(i)); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); - + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); for (unsigned j = 0; j != Ratio; ++j) { // Shift the piece of the value into the right place, depending on // endianness. - Constant *Elt = ConstantExpr::getLShr(Src, + Constant *Elt = ConstantExpr::getLShr(Src, ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; - + // Truncate and remember this piece. Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); } } - + return ConstantVector::get(Result); } @@ -218,34 +218,34 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, /// from a global, return the global and the constant. Because of /// constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - int64_t &Offset, const TargetData &TD) { + int64_t &Offset, const DataLayout &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast(C))) { Offset = 0; return true; } - + // Otherwise, if this isn't a constant expr, bail out. ConstantExpr *CE = dyn_cast(C); if (!CE) return false; - + // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || CE->getOpcode() == Instruction::BitCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); - - // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) + + // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) if (CE->getOpcode() == Instruction::GetElementPtr) { // Cannot compute this if the element type of the pointer is missing size // info. if (!cast(CE->getOperand(0)->getType()) ->getElementType()->isSized()) return false; - + // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) return false; - + // Otherwise, add any offset that our operands provide. gep_type_iterator GTI = gep_type_begin(CE); for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); @@ -253,7 +253,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, ConstantInt *CI = dyn_cast(*i); if (!CI) return false; // Index isn't a simple constant? if (CI->isZero()) continue; // Not adding anything. - + if (StructType *ST = dyn_cast(*GTI)) { // N = N + Offset Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); @@ -264,7 +264,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, } return true; } - + return false; } @@ -274,30 +274,33 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, /// the CurPtr buffer. TD is the target data. static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, unsigned BytesLeft, - const TargetData &TD) { + const DataLayout &TD) { assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) && "Out of range access"); - + // If this element is zero or undefined, we can just return since *CurPtr is // zero initialized. if (isa(C) || isa(C)) return true; - + if (ConstantInt *CI = dyn_cast(C)) { if (CI->getBitWidth() > 64 || (CI->getBitWidth() & 7) != 0) return false; - + uint64_t Val = CI->getZExtValue(); unsigned IntBytes = unsigned(CI->getBitWidth()/8); - + for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { - CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8)); + int n = ByteOffset; + if (!TD.isLittleEndian()) + n = IntBytes - n - 1; + CurPtr[i] = (unsigned char)(Val >> (n * 8)); ++ByteOffset; } return true; } - + if (ConstantFP *CFP = dyn_cast(C)) { if (CFP->getType()->isDoubleTy()) { C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD); @@ -309,13 +312,13 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } return false; } - + if (ConstantStruct *CS = dyn_cast(C)) { const StructLayout *SL = TD.getStructLayout(CS->getType()); unsigned Index = SL->getElementContainingOffset(ByteOffset); uint64_t CurEltOffset = SL->getElementOffset(Index); ByteOffset -= CurEltOffset; - + while (1) { // If the element access is to the element itself and not to tail padding, // read the bytes from the element. @@ -325,9 +328,9 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, BytesLeft, TD)) return false; - + ++Index; - + // Check to see if we read from the last struct element, if so we're done. if (Index == CS->getType()->getNumElements()) return true; @@ -375,11 +378,11 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } return true; } - + if (ConstantExpr *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::IntToPtr && - CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) - return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, BytesLeft, TD); } @@ -388,10 +391,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, - const TargetData &TD) { + const DataLayout &TD) { Type *LoadTy = cast(C->getType())->getElementType(); IntegerType *IntType = dyn_cast(LoadTy); - + // If this isn't an integer load we can't fold it directly. if (!IntType) { // If this is a float/double load, we can try folding it as an int32/64 load @@ -415,15 +418,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, return FoldBitCast(Res, LoadTy, TD); return 0; } - + unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; if (BytesLoaded > 32 || BytesLoaded == 0) return 0; - + GlobalValue *GVal; int64_t Offset; if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; - + GlobalVariable *GV = dyn_cast(GVal); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || !GV->getInitializer()->getType()->isSized()) @@ -432,20 +435,29 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. if (Offset < 0) return 0; - + // If we're not accessing anything in this constant, the result is undefined. if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); - + unsigned char RawBytes[32] = {0}; if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, BytesLoaded, TD)) return 0; - APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); - for (unsigned i = 1; i != BytesLoaded; ++i) { - ResultVal <<= 8; - ResultVal |= RawBytes[BytesLoaded-1-i]; + APInt ResultVal = APInt(IntType->getBitWidth(), 0); + if (TD.isLittleEndian()) { + ResultVal = RawBytes[BytesLoaded - 1]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[BytesLoaded-1-i]; + } + } else { + ResultVal = RawBytes[0]; + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[i]; + } } return ConstantInt::get(IntType->getContext(), ResultVal); @@ -455,7 +467,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, /// produce if it is constant and determinable. If this is not determinable, /// return null. Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, - const TargetData *TD) { + const DataLayout *TD) { // First, try the easy cases: if (GlobalVariable *GV = dyn_cast(C)) if (GV->isConstant() && GV->hasDefinitiveInitializer()) @@ -464,15 +476,15 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast(C); if (!CE) return 0; - + if (CE->getOpcode() == Instruction::GetElementPtr) { if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) if (GV->isConstant() && GV->hasDefinitiveInitializer()) - if (Constant *V = + if (Constant *V = ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) return V; } - + // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. StringRef Str; @@ -500,14 +512,14 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, SingleChar = 0; StrVal = (StrVal << 8) | SingleChar; } - + Constant *Res = ConstantInt::get(CE->getContext(), StrVal); if (Ty->isFloatingPointTy()) Res = ConstantExpr::getBitCast(Res, Ty); return Res; } } - + // If this load comes from anywhere in a constant global, and if the global // is all undef or zero, we know what it loads. if (GlobalVariable *GV = @@ -520,18 +532,16 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, return UndefValue::get(ResTy); } } - - // Try hard to fold loads from bitcasted strange and non-type-safe things. We - // currently don't do any of this for big endian systems. It can be - // generalized in the future if someone is interested. - if (TD && TD->isLittleEndian()) + + // Try hard to fold loads from bitcasted strange and non-type-safe things. + if (TD) return FoldReinterpretLoadFromConstPtr(CE, *TD); return 0; } -static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){ +static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ if (LI->isVolatile()) return 0; - + if (Constant *C = dyn_cast(LI->getOperand(0))) return ConstantFoldLoadFromConstPtr(C, TD); @@ -540,23 +550,23 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){ /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging -/// these together. If target data info is available, it is provided as TD, +/// these together. If target data info is available, it is provided as TD, /// otherwise TD is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const TargetData *TD){ + Constant *Op1, const DataLayout *TD){ // SROA - + // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute // bits. - - + + // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. if (Opc == Instruction::Sub && TD) { GlobalValue *GV1, *GV2; int64_t Offs1, Offs2; - + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && GV1 == GV2) { @@ -564,7 +574,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, return ConstantInt::get(Op0->getType(), Offs1-Offs2); } } - + return 0; } @@ -572,7 +582,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// explicitly cast them so that they aren't implicitly casted by the /// getelementptr. static Constant *CastGEPIndices(ArrayRef Ops, - Type *ResultTy, const TargetData *TD, + Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TD) return 0; Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); @@ -622,20 +632,20 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) { /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, - Type *ResultTy, const TargetData *TD, + Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; if (!TD || !cast(Ptr->getType())->getElementType()->isSized() || !Ptr->getType()->isPointerTy()) return 0; - + Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' for (unsigned i = 1, e = Ops.size(); i != e; ++i) if (!isa(Ops[i])) { - + // If this is "gep i8* Ptr, (sub 0, V)", fold this as: // "inttoptr (sub (ptrtoint Ptr), V)" if (Ops.size() == 2 && @@ -659,7 +669,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); APInt Offset = APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), - makeArrayRef((Value **)Ops.data() + 1, + makeArrayRef((Value *const*) + Ops.data() + 1, Ops.size() - 1))); Ptr = StripPtrCastKeepAS(Ptr); @@ -708,12 +719,12 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // The only pointer indexing we'll do is on the first index of the GEP. if (!NewIdxs.empty()) break; - + // Only handle pointers to sized types, not pointers to functions. if (!ATy->getElementType()->isSized()) return 0; } - + // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); @@ -785,7 +796,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. Constant *llvm::ConstantFoldInstruction(Instruction *I, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast(I)) { @@ -836,7 +847,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, if (const CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], TD, TLI); - + if (const LoadInst *LI = dyn_cast(I)) return ConstantFoldLoadInst(LI, TD); @@ -855,10 +866,10 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, } /// ConstantFoldConstantExpression - Attempt to fold the constant expression -/// using the specified TargetData. If successful, the constant result is +/// using the specified DataLayout. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI) { SmallVector Ops; for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); @@ -886,19 +897,19 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// information, due to only being passed an opcode and operands. Constant /// folding using this function strips this information. /// -Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, +Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef Ops, - const TargetData *TD, - const TargetLibraryInfo *TLI) { + const DataLayout *TD, + const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa(Ops[0]) || isa(Ops[1])) if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) return C; - + return ConstantExpr::get(Opcode, Ops[0], Ops[1]); } - + switch (Opcode) { default: return 0; case Instruction::ICmp: @@ -916,7 +927,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, Constant *Input = CE->getOperand(0); unsigned InWidth = Input->getType()->getScalarSizeInBits(); if (TD->getPointerSizeInBits() < InWidth) { - Constant *Mask = + Constant *Mask = ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, TD->getPointerSizeInBits())); Input = ConstantExpr::getAnd(Input, Mask); @@ -964,7 +975,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, return C; if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI)) return C; - + return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); } } @@ -974,8 +985,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// returns a constant expression of the specified operands. /// Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, - Constant *Ops0, Constant *Ops1, - const TargetData *TD, + Constant *Ops0, Constant *Ops1, + const DataLayout *TD, const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null @@ -995,17 +1006,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *Null = Constant::getNullValue(C->getType()); return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } - + // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. - if (CE0->getOpcode() == Instruction::PtrToInt && + if (CE0->getOpcode() == Instruction::PtrToInt && CE0->getType() == IntPtrTy) { Constant *C = CE0->getOperand(0); Constant *Null = Constant::getNullValue(C->getType()); return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } } - + if (ConstantExpr *CE1 = dyn_cast(Ops1)) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); @@ -1029,24 +1040,24 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE1->getOperand(0), TD, TLI); } } - + // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { - Constant *LHS = + Constant *LHS = ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1, TD, TLI); - Constant *RHS = + Constant *RHS = ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1, TD, TLI); - unsigned OpC = + unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI); } } - + return ConstantExpr::getCompare(Predicate, Ops0, Ops1); } @@ -1054,7 +1065,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a /// getelementptr constantexpr, return the constant value being addressed by the /// constant expression, or null if something is funny and we can't decide. -Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, +Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { if (!CE->getOperand(1)->isNullValue()) return 0; // Do not allow stepping over the value! @@ -1124,14 +1135,14 @@ llvm::canConstantFoldCallTo(const Function *F) { if (!F->hasName()) return false; StringRef Name = F->getName(); - + // In these cases, the check of the length is required. We don't want to // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || + return Name == "acos" || Name == "asin" || Name == "atan" || Name == "atan2"; case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; @@ -1151,7 +1162,7 @@ llvm::canConstantFoldCallTo(const Function *F) { } } -static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, +static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, Type *Ty) { sys::llvm_fenv_clearexcept(); V = NativeFP(V); @@ -1159,7 +1170,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, sys::llvm_fenv_clearexcept(); return 0; } - + if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) @@ -1175,7 +1186,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), sys::llvm_fenv_clearexcept(); return 0; } - + if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) @@ -1269,7 +1280,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, case 'e': if (Name == "exp" && TLI->has(LibFunc::exp)) return ConstantFoldFP(exp, V, Ty); - + if (Name == "exp2" && TLI->has(LibFunc::exp2)) { // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. @@ -1345,7 +1356,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, } // Support ConstantVector in case we have an Undef in the top. - if (isa(Operands[0]) || + if (isa(Operands[0]) || isa(Operands[0])) { Constant *Op = cast(Operands[0]); switch (F->getIntrinsicID()) { @@ -1364,11 +1375,11 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, case Intrinsic::x86_sse2_cvttsd2si64: if (ConstantFP *FPOp = dyn_cast_or_null(Op->getAggregateElement(0U))) - return ConstantFoldConvertToInt(FPOp->getValueAPF(), + return ConstantFoldConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/true, Ty); } } - + if (isa(Operands[0])) { if (F->getIntrinsicID() == Intrinsic::bswap) return Operands[0]; @@ -1382,14 +1393,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (ConstantFP *Op1 = dyn_cast(Operands[0])) { if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty->isFloatTy() ? + double Op1V = Ty->isFloatTy() ? (double)Op1->getValueAPF().convertToFloat() : Op1->getValueAPF().convertToDouble(); if (ConstantFP *Op2 = dyn_cast(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - double Op2V = Ty->isFloatTy() ? + double Op2V = Ty->isFloatTy() ? (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); @@ -1416,7 +1427,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, } return 0; } - + if (ConstantInt *Op1 = dyn_cast(Operands[0])) { if (ConstantInt *Op2 = dyn_cast(Operands[1])) { switch (F->getIntrinsicID()) { @@ -1466,7 +1477,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } - + return 0; } return 0; diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp new file mode 100644 index 0000000..5adbf45 --- /dev/null +++ b/lib/Analysis/CostModel.cpp @@ -0,0 +1,193 @@ +//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the cost model analysis. It provides a very basic cost +// estimation for LLVM-IR. The cost result can be thought of as cycles, but it +// is really unit-less. The estimated cost is ment to be used for comparing +// alternatives. +// +//===----------------------------------------------------------------------===// + +#define CM_NAME "cost-model" +#define DEBUG_TYPE CM_NAME +#include "llvm/Analysis/Passes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/TargetTransformInfo.h" +#include "llvm/Value.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class CostModelAnalysis : public FunctionPass { + + public: + static char ID; // Class identification, replacement for typeinfo + CostModelAnalysis() : FunctionPass(ID), F(0), VTTI(0) { + initializeCostModelAnalysisPass( + *PassRegistry::getPassRegistry()); + } + + /// Returns the expected cost of the instruction. + /// Returns -1 if the cost is unknown. + /// Note, this method does not cache the cost calculation and it + /// can be expensive in some cases. + unsigned getInstructionCost(Instruction *I) const; + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual void print(raw_ostream &OS, const Module*) const; + + /// The function that we analyze. + Function *F; + /// Vector target information. + const VectorTargetTransformInfo *VTTI; + }; +} // End of anonymous namespace + +// Register this pass. +char CostModelAnalysis::ID = 0; +static const char cm_name[] = "Cost Model Analysis"; +INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) +INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) + +FunctionPass *llvm::createCostModelAnalysisPass() { + return new CostModelAnalysis(); +} + +void +CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +bool +CostModelAnalysis::runOnFunction(Function &F) { + this->F = &F; + + // Target information. + TargetTransformInfo *TTI; + TTI = getAnalysisIfAvailable(); + if (TTI) + VTTI = TTI->getVectorTargetTransformInfo(); + + return false; +} + +unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { + if (!VTTI) + return -1; + + switch (I->getOpcode()) { + case Instruction::Ret: + case Instruction::PHI: + case Instruction::Br: { + return VTTI->getCFInstrCost(I->getOpcode()); + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); + } + case Instruction::Select: { + SelectInst *SI = cast(I); + Type *CondTy = SI->getCondition()->getType(); + return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + } + case Instruction::Store: { + StoreInst *SI = cast(I); + Type *ValTy = SI->getValueOperand()->getType(); + return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), + SI->getPointerAddressSpace()); + } + case Instruction::Load: { + LoadInst *LI = cast(I); + return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), + LI->getAlignment(), + LI->getPointerAddressSpace()); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + Type *SrcTy = I->getOperand(0)->getType(); + return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + } + case Instruction::ExtractElement: { + ExtractElementInst * EEI = cast(I); + ConstantInt *CI = dyn_cast(I->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return VTTI->getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); + } + case Instruction::InsertElement: { + InsertElementInst * IE = cast(I); + ConstantInt *CI = dyn_cast(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return VTTI->getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } + default: + // We don't have any information on this instruction. + return -1; + } +} + +void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { + if (!F) + return; + + for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { + for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { + Instruction *Inst = it; + unsigned Cost = getInstructionCost(Inst); + if (Cost != (unsigned)-1) + OS << "Cost Model: Found an estimated cost of " << Cost; + else + OS << "Cost Model: Unknown cost"; + + OS << " for instruction: "<< *Inst << "\n"; + } + } +} diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp new file mode 100644 index 0000000..95ac5ea --- /dev/null +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -0,0 +1,3786 @@ +//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DependenceAnalysis is an LLVM pass that analyses dependences between memory +// accesses. Currently, it is an (incomplete) implementation of the approach +// described in +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// There's a single entry point that analyzes the dependence between a pair +// of memory references in a function, returning either NULL, for no dependence, +// or a more-or-less detailed description of the dependence between them. +// +// Currently, the implementation cannot propagate constraints between +// coupled RDIV subscripts and lacks a multi-subscript MIV test. +// Both of these are conservative weaknesses; +// that is, not a source of correctness problems. +// +// The implementation depends on the GEP instruction to +// differentiate subscripts. Since Clang linearizes subscripts +// for most arrays, we give up some precision (though the existing MIV tests +// will help). We trust that the GEP instruction will eventually be extended. +// In the meantime, we should explore Maslov's ideas about delinearization. +// +// We should pay some careful attention to the possibility of integer overflow +// in the implementation of the various tests. This could happen with Add, +// Subtract, or Multiply, with both APInt's and SCEV's. +// +// Some non-linear subscript pairs can be handled by the GCD test +// (and perhaps other tests). +// Should explore how often these things occur. +// +// Finally, it seems like certain test cases expose weaknesses in the SCEV +// simplification, especially in the handling of sign and zero extensions. +// It could be useful to spend time exploring these. +// +// Please note that this is work in progress and the interface is subject to +// change. +// +//===----------------------------------------------------------------------===// +// // +// In memory of Ken Kennedy, 1945 - 2007 // +// // +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "da" + +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Operator.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// statistics + +STATISTIC(TotalArrayPairs, "Array pairs tested"); +STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); +STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); +STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); +STATISTIC(ZIVapplications, "ZIV applications"); +STATISTIC(ZIVindependence, "ZIV independence"); +STATISTIC(StrongSIVapplications, "Strong SIV applications"); +STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); +STATISTIC(StrongSIVindependence, "Strong SIV independence"); +STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); +STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); +STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); +STATISTIC(ExactSIVapplications, "Exact SIV applications"); +STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); +STATISTIC(ExactSIVindependence, "Exact SIV independence"); +STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); +STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); +STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); +STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); +STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); +STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); +STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); +STATISTIC(DeltaApplications, "Delta applications"); +STATISTIC(DeltaSuccesses, "Delta successes"); +STATISTIC(DeltaIndependence, "Delta independence"); +STATISTIC(DeltaPropagations, "Delta propagations"); +STATISTIC(GCDapplications, "GCD applications"); +STATISTIC(GCDsuccesses, "GCD successes"); +STATISTIC(GCDindependence, "GCD independence"); +STATISTIC(BanerjeeApplications, "Banerjee applications"); +STATISTIC(BanerjeeIndependence, "Banerjee independence"); +STATISTIC(BanerjeeSuccesses, "Banerjee successes"); + +//===----------------------------------------------------------------------===// +// basics + +INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", + "Dependence Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(DependenceAnalysis, "da", + "Dependence Analysis", true, true) + +char DependenceAnalysis::ID = 0; + + +FunctionPass *llvm::createDependenceAnalysisPass() { + return new DependenceAnalysis(); +} + + +bool DependenceAnalysis::runOnFunction(Function &F) { + this->F = &F; + AA = &getAnalysis(); + SE = &getAnalysis(); + LI = &getAnalysis(); + return false; +} + + +void DependenceAnalysis::releaseMemory() { +} + + +void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); +} + + +// Used to test the dependence analyzer. +// Looks through the function, noting the first store instruction +// and the first load instruction +// (which always follows the first load in our tests). +// Calls depends() and prints out the result. +// Ignores all other instructions. +static +void dumpExampleDependence(raw_ostream &OS, Function *F, + DependenceAnalysis *DA) { + for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); + SrcI != SrcE; ++SrcI) { + if (const StoreInst *Src = dyn_cast(&*SrcI)) { + for (inst_iterator DstI = SrcI, DstE = inst_end(F); + DstI != DstE; ++DstI) { + if (const LoadInst *Dst = dyn_cast(&*DstI)) { + OS << "da analyze - "; + if (Dependence *D = DA->depends(Src, Dst, true)) { + D->dump(OS); + for (unsigned Level = 1; Level <= D->getLevels(); Level++) { + if (D->isSplitable(Level)) { + OS << "da analyze - split level = " << Level; + OS << ", iteration = " << *DA->getSplitIteration(D, Level); + OS << "!\n"; + } + } + delete D; + } + else + OS << "none!\n"; + return; + } + } + } + } +} + + +void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { + dumpExampleDependence(OS, F, const_cast(this)); +} + +//===----------------------------------------------------------------------===// +// Dependence methods + +// Returns true if this is an input dependence. +bool Dependence::isInput() const { + return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); +} + + +// Returns true if this is an output dependence. +bool Dependence::isOutput() const { + return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); +} + + +// Returns true if this is an flow (aka true) dependence. +bool Dependence::isFlow() const { + return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); +} + + +// Returns true if this is an anti dependence. +bool Dependence::isAnti() const { + return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); +} + + +// Returns true if a particular level is scalar; that is, +// if no subscript in the source or destination mention the induction +// variable associated with the loop at this level. +// Leave this out of line, so it will serve as a virtual method anchor +bool Dependence::isScalar(unsigned level) const { + return false; +} + + +//===----------------------------------------------------------------------===// +// FullDependence methods + +FullDependence::FullDependence(const Instruction *Source, + const Instruction *Destination, + bool PossiblyLoopIndependent, + unsigned CommonLevels) : + Dependence(Source, Destination), + Levels(CommonLevels), + LoopIndependent(PossiblyLoopIndependent) { + Consistent = true; + DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; +} + +// The rest are simple getters that hide the implementation. + +// getDirection - Returns the direction associated with a particular level. +unsigned FullDependence::getDirection(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Direction; +} + + +// Returns the distance (or NULL) associated with a particular level. +const SCEV *FullDependence::getDistance(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Distance; +} + + +// Returns true if a particular level is scalar; that is, +// if no subscript in the source or destination mention the induction +// variable associated with the loop at this level. +bool FullDependence::isScalar(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Scalar; +} + + +// Returns true if peeling the first iteration from this loop +// will break this dependence. +bool FullDependence::isPeelFirst(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].PeelFirst; +} + + +// Returns true if peeling the last iteration from this loop +// will break this dependence. +bool FullDependence::isPeelLast(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].PeelLast; +} + + +// Returns true if splitting this loop will break the dependence. +bool FullDependence::isSplitable(unsigned Level) const { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1].Splitable; +} + + +//===----------------------------------------------------------------------===// +// DependenceAnalysis::Constraint methods + +// If constraint is a point , returns X. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getX() const { + assert(Kind == Point && "Kind should be Point"); + return A; +} + + +// If constraint is a point , returns Y. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getY() const { + assert(Kind == Point && "Kind should be Point"); + return B; +} + + +// If constraint is a line AX + BY = C, returns A. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getA() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return A; +} + + +// If constraint is a line AX + BY = C, returns B. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getB() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return B; +} + + +// If constraint is a line AX + BY = C, returns C. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getC() const { + assert((Kind == Line || Kind == Distance) && + "Kind should be Line (or Distance)"); + return C; +} + + +// If constraint is a distance, returns D. +// Otherwise assert. +const SCEV *DependenceAnalysis::Constraint::getD() const { + assert(Kind == Distance && "Kind should be Distance"); + return SE->getNegativeSCEV(C); +} + + +// Returns the loop associated with this constraint. +const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { + assert((Kind == Distance || Kind == Line || Kind == Point) && + "Kind should be Distance, Line, or Point"); + return AssociatedLoop; +} + + +void DependenceAnalysis::Constraint::setPoint(const SCEV *X, + const SCEV *Y, + const Loop *CurLoop) { + Kind = Point; + A = X; + B = Y; + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setLine(const SCEV *AA, + const SCEV *BB, + const SCEV *CC, + const Loop *CurLoop) { + Kind = Line; + A = AA; + B = BB; + C = CC; + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setDistance(const SCEV *D, + const Loop *CurLoop) { + Kind = Distance; + A = SE->getConstant(D->getType(), 1); + B = SE->getNegativeSCEV(A); + C = SE->getNegativeSCEV(D); + AssociatedLoop = CurLoop; +} + + +void DependenceAnalysis::Constraint::setEmpty() { + Kind = Empty; +} + + +void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { + SE = NewSE; + Kind = Any; +} + + +// For debugging purposes. Dumps the constraint out to OS. +void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { + if (isEmpty()) + OS << " Empty\n"; + else if (isAny()) + OS << " Any\n"; + else if (isPoint()) + OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; + else if (isDistance()) + OS << " Distance is " << *getD() << + " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; + else if (isLine()) + OS << " Line is " << *getA() << "*X + " << + *getB() << "*Y = " << *getC() << "\n"; + else + llvm_unreachable("unknown constraint type in Constraint::dump"); +} + + +// Updates X with the intersection +// of the Constraints X and Y. Returns true if X has changed. +// Corresponds to Figure 4 from the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +bool DependenceAnalysis::intersectConstraints(Constraint *X, + const Constraint *Y) { + ++DeltaApplications; + DEBUG(dbgs() << "\tintersect constraints\n"); + DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); + DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); + assert(!Y->isPoint() && "Y must not be a Point"); + if (X->isAny()) { + if (Y->isAny()) + return false; + *X = *Y; + return true; + } + if (X->isEmpty()) + return false; + if (Y->isEmpty()) { + X->setEmpty(); + return true; + } + + if (X->isDistance() && Y->isDistance()) { + DEBUG(dbgs() << "\t intersect 2 distances\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + // Hmmm, interesting situation. + // I guess if either is constant, keep it and ignore the other. + if (isa(Y->getD())) { + *X = *Y; + return true; + } + return false; + } + + // At this point, the pseudo-code in Figure 4 of the paper + // checks if (X->isPoint() && Y->isPoint()). + // This case can't occur in our implementation, + // since a Point can only arise as the result of intersecting + // two Line constraints, and the right-hand value, Y, is never + // the result of an intersection. + assert(!(X->isPoint() && Y->isPoint()) && + "We shouldn't ever see X->isPoint() && Y->isPoint()"); + + if (X->isLine() && Y->isLine()) { + DEBUG(dbgs() << "\t intersect 2 lines\n"); + const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); + const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); + if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { + // slopes are equal, so lines are parallel + DEBUG(dbgs() << "\t\tsame slope\n"); + Prod1 = SE->getMulExpr(X->getC(), Y->getB()); + Prod2 = SE->getMulExpr(X->getB(), Y->getC()); + if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + return false; + } + if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { + // slopes differ, so lines intersect + DEBUG(dbgs() << "\t\tdifferent slopes\n"); + const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); + const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); + const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); + const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); + const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); + const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); + const SCEVConstant *C1A2_C2A1 = + dyn_cast(SE->getMinusSCEV(C1A2, C2A1)); + const SCEVConstant *C1B2_C2B1 = + dyn_cast(SE->getMinusSCEV(C1B2, C2B1)); + const SCEVConstant *A1B2_A2B1 = + dyn_cast(SE->getMinusSCEV(A1B2, A2B1)); + const SCEVConstant *A2B1_A1B2 = + dyn_cast(SE->getMinusSCEV(A2B1, A1B2)); + if (!C1B2_C2B1 || !C1A2_C2A1 || + !A1B2_A2B1 || !A2B1_A1B2) + return false; + APInt Xtop = C1B2_C2B1->getValue()->getValue(); + APInt Xbot = A1B2_A2B1->getValue()->getValue(); + APInt Ytop = C1A2_C2A1->getValue()->getValue(); + APInt Ybot = A2B1_A1B2->getValue()->getValue(); + DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); + DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); + DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); + DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); + APInt Xq = Xtop; // these need to be initialized, even + APInt Xr = Xtop; // though they're just going to be overwritten + APInt::sdivrem(Xtop, Xbot, Xq, Xr); + APInt Yq = Ytop; + APInt Yr = Ytop;; + APInt::sdivrem(Ytop, Ybot, Yq, Yr); + if (Xr != 0 || Yr != 0) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); + if (Xq.slt(0) || Yq.slt(0)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + if (const SCEVConstant *CUB = + collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { + APInt UpperBound = CUB->getValue()->getValue(); + DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); + if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + } + X->setPoint(SE->getConstant(Xq), + SE->getConstant(Yq), + X->getAssociatedLoop()); + ++DeltaSuccesses; + return true; + } + return false; + } + + // if (X->isLine() && Y->isPoint()) This case can't occur. + assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); + + if (X->isPoint() && Y->isLine()) { + DEBUG(dbgs() << "\t intersect Point and Line\n"); + const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); + const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); + const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); + if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) + return false; + if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { + X->setEmpty(); + ++DeltaSuccesses; + return true; + } + return false; + } + + llvm_unreachable("shouldn't reach the end of Constraint intersection"); + return false; +} + + +//===----------------------------------------------------------------------===// +// DependenceAnalysis methods + +// For debugging purposes. Dumps a dependence to OS. +void Dependence::dump(raw_ostream &OS) const { + bool Splitable = false; + if (isConfused()) + OS << "confused"; + else { + if (isConsistent()) + OS << "consistent "; + if (isFlow()) + OS << "flow"; + else if (isOutput()) + OS << "output"; + else if (isAnti()) + OS << "anti"; + else if (isInput()) + OS << "input"; + unsigned Levels = getLevels(); + if (Levels) { + OS << " ["; + for (unsigned II = 1; II <= Levels; ++II) { + if (isSplitable(II)) + Splitable = true; + if (isPeelFirst(II)) + OS << 'p'; + const SCEV *Distance = getDistance(II); + if (Distance) + OS << *Distance; + else if (isScalar(II)) + OS << "S"; + else { + unsigned Direction = getDirection(II); + if (Direction == DVEntry::ALL) + OS << "*"; + else { + if (Direction & DVEntry::LT) + OS << "<"; + if (Direction & DVEntry::EQ) + OS << "="; + if (Direction & DVEntry::GT) + OS << ">"; + } + } + if (isPeelLast(II)) + OS << 'p'; + if (II < Levels) + OS << " "; + } + if (isLoopIndependent()) + OS << "|<"; + OS << "]"; + if (Splitable) + OS << " splitable"; + } + } + OS << "!\n"; +} + + + +static +AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, + const Value *A, + const Value *B) { + const Value *AObj = GetUnderlyingObject(A); + const Value *BObj = GetUnderlyingObject(B); + return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), + BObj, AA->getTypeStoreSize(BObj->getType())); +} + + +// Returns true if the load or store can be analyzed. Atomic and volatile +// operations have properties which this analysis does not understand. +static +bool isLoadOrStore(const Instruction *I) { + if (const LoadInst *LI = dyn_cast(I)) + return LI->isUnordered(); + else if (const StoreInst *SI = dyn_cast(I)) + return SI->isUnordered(); + return false; +} + + +static +const Value *getPointerOperand(const Instruction *I) { + if (const LoadInst *LI = dyn_cast(I)) + return LI->getPointerOperand(); + if (const StoreInst *SI = dyn_cast(I)) + return SI->getPointerOperand(); + llvm_unreachable("Value is not load or store instruction"); + return 0; +} + + +// Examines the loop nesting of the Src and Dst +// instructions and establishes their shared loops. Sets the variables +// CommonLevels, SrcLevels, and MaxLevels. +// The source and destination instructions needn't be contained in the same +// loop. The routine establishNestingLevels finds the level of most deeply +// nested loop that contains them both, CommonLevels. An instruction that's +// not contained in a loop is at level = 0. MaxLevels is equal to the level +// of the source plus the level of the destination, minus CommonLevels. +// This lets us allocate vectors MaxLevels in length, with room for every +// distinct loop referenced in both the source and destination subscripts. +// The variable SrcLevels is the nesting depth of the source instruction. +// It's used to help calculate distinct loops referenced by the destination. +// Here's the map from loops to levels: +// 0 - unused +// 1 - outermost common loop +// ... - other common loops +// CommonLevels - innermost common loop +// ... - loops containing Src but not Dst +// SrcLevels - innermost loop containing Src but not Dst +// ... - loops containing Dst but not Src +// MaxLevels - innermost loops containing Dst but not Src +// Consider the follow code fragment: +// for (a = ...) { +// for (b = ...) { +// for (c = ...) { +// for (d = ...) { +// A[] = ...; +// } +// } +// for (e = ...) { +// for (f = ...) { +// for (g = ...) { +// ... = A[]; +// } +// } +// } +// } +// } +// If we're looking at the possibility of a dependence between the store +// to A (the Src) and the load from A (the Dst), we'll note that they +// have 2 loops in common, so CommonLevels will equal 2 and the direction +// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. +// A map from loop names to loop numbers would look like +// a - 1 +// b - 2 = CommonLevels +// c - 3 +// d - 4 = SrcLevels +// e - 5 +// f - 6 +// g - 7 = MaxLevels +void DependenceAnalysis::establishNestingLevels(const Instruction *Src, + const Instruction *Dst) { + const BasicBlock *SrcBlock = Src->getParent(); + const BasicBlock *DstBlock = Dst->getParent(); + unsigned SrcLevel = LI->getLoopDepth(SrcBlock); + unsigned DstLevel = LI->getLoopDepth(DstBlock); + const Loop *SrcLoop = LI->getLoopFor(SrcBlock); + const Loop *DstLoop = LI->getLoopFor(DstBlock); + SrcLevels = SrcLevel; + MaxLevels = SrcLevel + DstLevel; + while (SrcLevel > DstLevel) { + SrcLoop = SrcLoop->getParentLoop(); + SrcLevel--; + } + while (DstLevel > SrcLevel) { + DstLoop = DstLoop->getParentLoop(); + DstLevel--; + } + while (SrcLoop != DstLoop) { + SrcLoop = SrcLoop->getParentLoop(); + DstLoop = DstLoop->getParentLoop(); + SrcLevel--; + } + CommonLevels = SrcLevel; + MaxLevels -= CommonLevels; +} + + +// Given one of the loops containing the source, return +// its level index in our numbering scheme. +unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { + return SrcLoop->getLoopDepth(); +} + + +// Given one of the loops containing the destination, +// return its level index in our numbering scheme. +unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { + unsigned D = DstLoop->getLoopDepth(); + if (D > CommonLevels) + return D - CommonLevels + SrcLevels; + else + return D; +} + + +// Returns true if Expression is loop invariant in LoopNest. +bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, + const Loop *LoopNest) const { + if (!LoopNest) + return true; + return SE->isLoopInvariant(Expression, LoopNest) && + isLoopInvariant(Expression, LoopNest->getParentLoop()); +} + + + +// Finds the set of loops from the LoopNest that +// have a level <= CommonLevels and are referred to by the SCEV Expression. +void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, + const Loop *LoopNest, + SmallBitVector &Loops) const { + while (LoopNest) { + unsigned Level = LoopNest->getLoopDepth(); + if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) + Loops.set(Level); + LoopNest = LoopNest->getParentLoop(); + } +} + + +// removeMatchingExtensions - Examines a subscript pair. +// If the source and destination are identically sign (or zero) +// extended, it strips off the extension in an effect to simplify +// the actual analysis. +void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { + const SCEV *Src = Pair->Src; + const SCEV *Dst = Pair->Dst; + if ((isa(Src) && isa(Dst)) || + (isa(Src) && isa(Dst))) { + const SCEVCastExpr *SrcCast = cast(Src); + const SCEVCastExpr *DstCast = cast(Dst); + if (SrcCast->getType() == DstCast->getType()) { + Pair->Src = SrcCast->getOperand(); + Pair->Dst = DstCast->getOperand(); + } + } +} + + +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, + const Loop *LoopNest, + SmallBitVector &Loops) { + const SCEVAddRecExpr *AddRec = dyn_cast(Src); + if (!AddRec) + return isLoopInvariant(Src, LoopNest); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + if (!isLoopInvariant(Step, LoopNest)) + return false; + Loops.set(mapSrcLoop(AddRec->getLoop())); + return checkSrcSubscript(Start, LoopNest, Loops); +} + + + +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, + const Loop *LoopNest, + SmallBitVector &Loops) { + const SCEVAddRecExpr *AddRec = dyn_cast(Dst); + if (!AddRec) + return isLoopInvariant(Dst, LoopNest); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + if (!isLoopInvariant(Step, LoopNest)) + return false; + Loops.set(mapDstLoop(AddRec->getLoop())); + return checkDstSubscript(Start, LoopNest, Loops); +} + + +// Examines the subscript pair (the Src and Dst SCEVs) +// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. +// Collects the associated loops in a set. +DependenceAnalysis::Subscript::ClassificationKind +DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, + const SCEV *Dst, const Loop *DstLoopNest, + SmallBitVector &Loops) { + SmallBitVector SrcLoops(MaxLevels + 1); + SmallBitVector DstLoops(MaxLevels + 1); + if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) + return Subscript::NonLinear; + if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) + return Subscript::NonLinear; + Loops = SrcLoops; + Loops |= DstLoops; + unsigned N = Loops.count(); + if (N == 0) + return Subscript::ZIV; + if (N == 1) + return Subscript::SIV; + if (N == 2 && (SrcLoops.count() == 0 || + DstLoops.count() == 0 || + (SrcLoops.count() == 1 && DstLoops.count() == 1))) + return Subscript::RDIV; + return Subscript::MIV; +} + + +// A wrapper around SCEV::isKnownPredicate. +// Looks for cases where we're interested in comparing for equality. +// If both X and Y have been identically sign or zero extended, +// it strips off the (confusing) extensions before invoking +// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package +// will be similarly updated. +// +// If SCEV::isKnownPredicate can't prove the predicate, +// we try simple subtraction, which seems to help in some cases +// involving symbolics. +bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *X, + const SCEV *Y) const { + if (Pred == CmpInst::ICMP_EQ || + Pred == CmpInst::ICMP_NE) { + if ((isa(X) && + isa(Y)) || + (isa(X) && + isa(Y))) { + const SCEVCastExpr *CX = cast(X); + const SCEVCastExpr *CY = cast(Y); + const SCEV *Xop = CX->getOperand(); + const SCEV *Yop = CY->getOperand(); + if (Xop->getType() == Yop->getType()) { + X = Xop; + Y = Yop; + } + } + } + if (SE->isKnownPredicate(Pred, X, Y)) + return true; + // If SE->isKnownPredicate can't prove the condition, + // we try the brute-force approach of subtracting + // and testing the difference. + // By testing with SE->isKnownPredicate first, we avoid + // the possibility of overflow when the arguments are constants. + const SCEV *Delta = SE->getMinusSCEV(X, Y); + switch (Pred) { + case CmpInst::ICMP_EQ: + return Delta->isZero(); + case CmpInst::ICMP_NE: + return SE->isKnownNonZero(Delta); + case CmpInst::ICMP_SGE: + return SE->isKnownNonNegative(Delta); + case CmpInst::ICMP_SLE: + return SE->isKnownNonPositive(Delta); + case CmpInst::ICMP_SGT: + return SE->isKnownPositive(Delta); + case CmpInst::ICMP_SLT: + return SE->isKnownNegative(Delta); + default: + llvm_unreachable("unexpected predicate in isKnownPredicate"); + } +} + + +// All subscripts are all the same type. +// Loop bound may be smaller (e.g., a char). +// Should zero extend loop bound, since it's always >= 0. +// This routine collects upper bound and extends if needed. +// Return null if no bound available. +const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, + Type *T) const { + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + const SCEV *UB = SE->getBackedgeTakenCount(L); + return SE->getNoopOrZeroExtend(UB, T); + } + return NULL; +} + + +// Calls collectUpperBound(), then attempts to cast it to SCEVConstant. +// If the cast fails, returns NULL. +const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, + Type *T + ) const { + if (const SCEV *UB = collectUpperBound(L, T)) + return dyn_cast(UB); + return NULL; +} + + +// testZIV - +// When we have a pair of subscripts of the form [c1] and [c2], +// where c1 and c2 are both loop invariant, we attack it using +// the ZIV test. Basically, we test by comparing the two values, +// but there are actually three possible results: +// 1) the values are equal, so there's a dependence +// 2) the values are different, so there's no dependence +// 3) the values might be equal, so we have to assume a dependence. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testZIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + ++ZIVapplications; + if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { + DEBUG(dbgs() << " provably dependent\n"); + return false; // provably dependent + } + if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { + DEBUG(dbgs() << " provably independent\n"); + ++ZIVindependence; + return true; // provably independent + } + DEBUG(dbgs() << " possibly dependent\n"); + Result.Consistent = false; + return false; // possibly dependent +} + + +// strongSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.1 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the Strong SIV test. +// +// Can prove independence. Failing that, can compute distance (and direction). +// In the presence of symbolic terms, we can sometimes make progress. +// +// If there's a dependence, +// +// c1 + a*i = c2 + a*i' +// +// The dependence distance is +// +// d = i' - i = (c1 - c2)/a +// +// A dependence only exists if d is an integer and abs(d) <= U, where U is the +// loop's upper bound. If a dependence exists, the dependence direction is +// defined as +// +// { < if d > 0 +// direction = { = if d = 0 +// { > if d < 0 +// +// Return true if dependence disproved. +bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + DEBUG(dbgs() << "\tStrong SIV test\n"); + DEBUG(dbgs() << "\t Coeff = " << *Coeff); + DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); + DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst); + DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); + ++StrongSIVapplications; + assert(0 < Level && Level <= CommonLevels && "level out of range"); + Level--; + + const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + DEBUG(dbgs() << "\t Delta = " << *Delta); + DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); + + // check that |Delta| < iteration count + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); + DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); + const SCEV *AbsDelta = + SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); + const SCEV *AbsCoeff = + SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); + const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); + if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { + // Distance greater than trip count - no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; + } + } + + // Can we compute distance? + if (isa(Delta) && isa(Coeff)) { + APInt ConstDelta = cast(Delta)->getValue()->getValue(); + APInt ConstCoeff = cast(Coeff)->getValue()->getValue(); + APInt Distance = ConstDelta; // these need to be initialized + APInt Remainder = ConstDelta; + APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); + DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + // Make sure Coeff divides Delta exactly + if (Remainder != 0) { + // Coeff doesn't divide Distance, no dependence + ++StrongSIVindependence; + ++StrongSIVsuccesses; + return true; + } + Result.DV[Level].Distance = SE->getConstant(Distance); + NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); + if (Distance.sgt(0)) + Result.DV[Level].Direction &= Dependence::DVEntry::LT; + else if (Distance.slt(0)) + Result.DV[Level].Direction &= Dependence::DVEntry::GT; + else + Result.DV[Level].Direction &= Dependence::DVEntry::EQ; + ++StrongSIVsuccesses; + } + else if (Delta->isZero()) { + // since 0/X == 0 + Result.DV[Level].Distance = Delta; + NewConstraint.setDistance(Delta, CurLoop); + Result.DV[Level].Direction &= Dependence::DVEntry::EQ; + ++StrongSIVsuccesses; + } + else { + if (Coeff->isOne()) { + DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); + Result.DV[Level].Distance = Delta; // since X/1 == X + NewConstraint.setDistance(Delta, CurLoop); + } + else { + Result.Consistent = false; + NewConstraint.setLine(Coeff, + SE->getNegativeSCEV(Coeff), + SE->getNegativeSCEV(Delta), CurLoop); + } + + // maybe we can get a useful direction + bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); + bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); + bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); + bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); + bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); + // The double negatives above are confusing. + // It helps to read !SE->isKnownNonZero(Delta) + // as "Delta might be Zero" + unsigned NewDirection = Dependence::DVEntry::NONE; + if ((DeltaMaybePositive && CoeffMaybePositive) || + (DeltaMaybeNegative && CoeffMaybeNegative)) + NewDirection = Dependence::DVEntry::LT; + if (DeltaMaybeZero) + NewDirection |= Dependence::DVEntry::EQ; + if ((DeltaMaybeNegative && CoeffMaybePositive) || + (DeltaMaybePositive && CoeffMaybeNegative)) + NewDirection |= Dependence::DVEntry::GT; + if (NewDirection < Result.DV[Level].Direction) + ++StrongSIVsuccesses; + Result.DV[Level].Direction &= NewDirection; + } + return false; +} + + +// weakCrossingSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Crossing SIV test. +// +// Given c1 + a*i = c2 - a*i', we can look for the intersection of +// the two lines, where i = i', yielding +// +// c1 + a*i = c2 - a*i +// 2a*i = c2 - c1 +// i = (c2 - c1)/2a +// +// If i < 0, there is no dependence. +// If i > upperbound, there is no dependence. +// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. +// If i = upperbound, there's a dependence with distance = 0. +// If i is integral, there's a dependence (all directions). +// If the non-integer part = 1/2, there's a dependence (<> directions). +// Otherwise, there's no dependence. +// +// Can prove independence. Failing that, +// can sometimes refine the directions. +// Can determine iteration for splitting. +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const { + DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); + DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakCrossingSIVapplications; + assert(0 < Level && Level <= CommonLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); + if (Delta->isZero()) { + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); + ++WeakCrossingSIVsuccesses; + if (!Result.DV[Level].Direction) { + ++WeakCrossingSIVindependence; + return true; + } + Result.DV[Level].Distance = Delta; // = 0 + return false; + } + const SCEVConstant *ConstCoeff = dyn_cast(Coeff); + if (!ConstCoeff) + return false; + + Result.DV[Level].Splitable = true; + if (SE->isKnownNegative(ConstCoeff)) { + ConstCoeff = dyn_cast(SE->getNegativeSCEV(ConstCoeff)); + assert(ConstCoeff && + "dynamic cast of negative of ConstCoeff should yield constant"); + Delta = SE->getNegativeSCEV(Delta); + } + assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); + + // compute SplitIter for use by DependenceAnalysis::getSplitIteration() + SplitIter = + SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), + Delta), + SE->getMulExpr(SE->getConstant(Delta->getType(), 2), + ConstCoeff)); + DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); + + const SCEVConstant *ConstDelta = dyn_cast(Delta); + if (!ConstDelta) + return false; + + // We're certain that ConstCoeff > 0; therefore, + // if Delta < 0, then no dependence. + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); + if (SE->isKnownNegative(Delta)) { + // No dependence, Delta < 0 + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + + // We're certain that Delta > 0 and ConstCoeff > 0. + // Check Delta/(2*ConstCoeff) against upper loop bound + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); + const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), + ConstantTwo); + DEBUG(dbgs() << "\t ML = " << *ML << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { + // Delta too big, no dependence + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { + // i = i' = UB + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); + ++WeakCrossingSIVsuccesses; + if (!Result.DV[Level].Direction) { + ++WeakCrossingSIVindependence; + return true; + } + Result.DV[Level].Splitable = false; + Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); + return false; + } + } + + // check that Coeff divides Delta + APInt APDelta = ConstDelta->getValue()->getValue(); + APInt APCoeff = ConstCoeff->getValue()->getValue(); + APInt Distance = APDelta; // these need to be initialzed + APInt Remainder = APDelta; + APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + if (Remainder != 0) { + // Coeff doesn't divide Delta, no dependence + ++WeakCrossingSIVindependence; + ++WeakCrossingSIVsuccesses; + return true; + } + DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); + + // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible + APInt Two = APInt(Distance.getBitWidth(), 2, true); + Remainder = Distance.srem(Two); + DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); + if (Remainder != 0) { + // Equal direction isn't possible + Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); + ++WeakCrossingSIVsuccesses; + } + return false; +} + + +// Kirch's algorithm, from +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// MIT Press, 1989 +// +// Program 2.1, page 29. +// Computes the GCD of AM and BM. +// Also finds a solution to the equation ax - by = gdc(a, b). +// Returns true iff the gcd divides Delta. +static +bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, + APInt &G, APInt &X, APInt &Y) { + APInt A0(Bits, 1, true), A1(Bits, 0, true); + APInt B0(Bits, 0, true), B1(Bits, 1, true); + APInt G0 = AM.abs(); + APInt G1 = BM.abs(); + APInt Q = G0; // these need to be initialized + APInt R = G0; + APInt::sdivrem(G0, G1, Q, R); + while (R != 0) { + APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; + APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; + G0 = G1; G1 = R; + APInt::sdivrem(G0, G1, Q, R); + } + G = G1; + DEBUG(dbgs() << "\t GCD = " << G << "\n"); + X = AM.slt(0) ? -A1 : A1; + Y = BM.slt(0) ? B1 : -B1; + + // make sure gcd divides Delta + R = Delta.srem(G); + if (R != 0) + return true; // gcd doesn't divide Delta, no dependence + Q = Delta.sdiv(G); + X *= Q; + Y *= Q; + return false; +} + + +static +APInt floorOfQuotient(APInt A, APInt B) { + APInt Q = A; // these need to be initialized + APInt R = A; + APInt::sdivrem(A, B, Q, R); + if (R == 0) + return Q; + if ((A.sgt(0) && B.sgt(0)) || + (A.slt(0) && B.slt(0))) + return Q; + else + return Q - 1; +} + + +static +APInt ceilingOfQuotient(APInt A, APInt B) { + APInt Q = A; // these need to be initialized + APInt R = A; + APInt::sdivrem(A, B, Q, R); + if (R == 0) + return Q; + if ((A.sgt(0) && B.sgt(0)) || + (A.slt(0) && B.slt(0))) + return Q + 1; + else + return Q; +} + + +static +APInt maxAPInt(APInt A, APInt B) { + return A.sgt(B) ? A : B; +} + + +static +APInt minAPInt(APInt A, APInt B) { + return A.slt(B) ? A : B; +} + + +// exactSIVtest - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], +// where i is an induction variable, c1 and c2 are loop invariant, and a1 +// and a2 are constant, we can solve it exactly using an algorithm developed +// by Banerjee and Wolfe. See Section 2.5.3 in +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// MIT Press, 1989 +// +// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), +// so use them if possible. They're also a bit better with symbolics and, +// in the case of the strong SIV test, can compute Distances. +// +// Return true if dependence disproved. +bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + DEBUG(dbgs() << "\tExact SIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++ExactSIVapplications; + assert(0 < Level && Level <= CommonLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), + Delta, CurLoop); + const SCEVConstant *ConstDelta = dyn_cast(Delta); + const SCEVConstant *ConstSrcCoeff = dyn_cast(SrcCoeff); + const SCEVConstant *ConstDstCoeff = dyn_cast(DstCoeff); + if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) + return false; + + // find gcd + APInt G, X, Y; + APInt AM = ConstSrcCoeff->getValue()->getValue(); + APInt BM = ConstDstCoeff->getValue()->getValue(); + unsigned Bits = AM.getBitWidth(); + if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + // gcd doesn't divide Delta, no dependence + ++ExactSIVindependence; + ++ExactSIVsuccesses; + return true; + } + + DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + + // since SCEV construction normalizes, LM = 0 + APInt UM(Bits, 1, true); + bool UMvalid = false; + // UM is perhaps unavailable, let's check + if (const SCEVConstant *CUB = + collectConstantUpperBound(CurLoop, Delta->getType())) { + UM = CUB->getValue()->getValue(); + DEBUG(dbgs() << "\t UM = " << UM << "\n"); + UMvalid = true; + } + + APInt TU(APInt::getSignedMaxValue(Bits)); + APInt TL(APInt::getSignedMinValue(Bits)); + + // test(BM/G, LM-X) and test(-BM/G, X-UM) + APInt TMUL = BM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (UMvalid) { + TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (UMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + + // test(AM/G, LM-Y) and test(-AM/G, Y-UM) + TMUL = AM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (UMvalid) { + TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (UMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + if (TL.sgt(TU)) { + ++ExactSIVindependence; + ++ExactSIVsuccesses; + return true; + } + + // explore directions + unsigned NewDirection = Dependence::DVEntry::NONE; + + // less than + APInt SaveTU(TU); // save these + APInt SaveTL(TL); + DEBUG(dbgs() << "\t exploring LT direction\n"); + TMUL = AM - BM; + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::LT; + ++ExactSIVsuccesses; + } + + // equal + TU = SaveTU; // restore + TL = SaveTL; + DEBUG(dbgs() << "\t exploring EQ direction\n"); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + TMUL = BM - AM; + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::EQ; + ++ExactSIVsuccesses; + } + + // greater than + TU = SaveTU; // restore + TL = SaveTL; + DEBUG(dbgs() << "\t exploring GT direction\n"); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); + DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); + } + else { + TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); + DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); + } + if (TL.sle(TU)) { + NewDirection |= Dependence::DVEntry::GT; + ++ExactSIVsuccesses; + } + + // finished + Result.DV[Level].Direction &= NewDirection; + if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) + ++ExactSIVindependence; + return Result.DV[Level].Direction == Dependence::DVEntry::NONE; +} + + + +// Return true if the divisor evenly divides the dividend. +static +bool isRemainderZero(const SCEVConstant *Dividend, + const SCEVConstant *Divisor) { + APInt ConstDividend = Dividend->getValue()->getValue(); + APInt ConstDivisor = Divisor->getValue()->getValue(); + return ConstDividend.srem(ConstDivisor) == 0; +} + + +// weakZeroSrcSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1] and [c2 + a*i], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Zero SIV test. +// +// Given +// +// c1 = c2 + a*i +// +// we get +// +// (c1 - c2)/a = i +// +// If i is not an integer, there's no dependence. +// If i < 0 or > UB, there's no dependence. +// If i = 0, the direction is <= and peeling the +// 1st iteration will break the dependence. +// If i = UB, the direction is >= and peeling the +// last iteration will break the dependence. +// Otherwise, the direction is *. +// +// Can prove independence. Failing that, we can sometimes refine +// the directions. Can sometimes show that first or last +// iteration carries all the dependences (so worth peeling). +// +// (see also weakZeroDstSIVtest) +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + // For the WeakSIV test, it's possible the loop isn't common to + // the Src and Dst loops. If it isn't, then there's no need to + // record a direction. + DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakZeroSIVapplications; + assert(0 < Level && Level <= MaxLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); + NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), + DstCoeff, Delta, CurLoop); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].PeelFirst = true; + ++WeakZeroSIVsuccesses; + } + return false; // dependences caused by first iteration + } + const SCEVConstant *ConstCoeff = dyn_cast(DstCoeff); + if (!ConstCoeff) + return false; + const SCEV *AbsCoeff = + SE->isKnownNegative(ConstCoeff) ? + SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; + const SCEV *NewDelta = + SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; + + // check that Delta/SrcCoeff < iteration count + // really check NewDelta < count*AbsCoeff + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); + if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { + // dependences caused by last iteration + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].PeelLast = true; + ++WeakZeroSIVsuccesses; + } + return false; + } + } + + // check that Delta/SrcCoeff >= 0 + // really check that NewDelta >= 0 + if (SE->isKnownNegative(NewDelta)) { + // No dependence, newDelta < 0 + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + + // if SrcCoeff doesn't divide Delta, then no dependence + if (isa(Delta) && + !isRemainderZero(cast(Delta), ConstCoeff)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + return false; +} + + +// weakZeroDstSIVtest - +// From the paper, Practical Dependence Testing, Section 4.2.2 +// +// When we have a pair of subscripts of the form [c1 + a*i] and [c2], +// where i is an induction variable, c1 and c2 are loop invariant, +// and a is a constant, we can solve it exactly using the +// Weak-Zero SIV test. +// +// Given +// +// c1 + a*i = c2 +// +// we get +// +// i = (c2 - c1)/a +// +// If i is not an integer, there's no dependence. +// If i < 0 or > UB, there's no dependence. +// If i = 0, the direction is <= and peeling the +// 1st iteration will break the dependence. +// If i = UB, the direction is >= and peeling the +// last iteration will break the dependence. +// Otherwise, the direction is *. +// +// Can prove independence. Failing that, we can sometimes refine +// the directions. Can sometimes show that first or last +// iteration carries all the dependences (so worth peeling). +// +// (see also weakZeroSrcSIVtest) +// +// Return true if dependence disproved. +bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { + // For the WeakSIV test, it's possible the loop isn't common to the + // Src and Dst loops. If it isn't, then there's no need to record a direction. + DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++WeakZeroSIVapplications; + assert(0 < Level && Level <= SrcLevels && "Level out of range"); + Level--; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), + Delta, CurLoop); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::LE; + Result.DV[Level].PeelFirst = true; + ++WeakZeroSIVsuccesses; + } + return false; // dependences caused by first iteration + } + const SCEVConstant *ConstCoeff = dyn_cast(SrcCoeff); + if (!ConstCoeff) + return false; + const SCEV *AbsCoeff = + SE->isKnownNegative(ConstCoeff) ? + SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; + const SCEV *NewDelta = + SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; + + // check that Delta/SrcCoeff < iteration count + // really check NewDelta < count*AbsCoeff + if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); + const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); + if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { + // dependences caused by last iteration + if (Level < CommonLevels) { + Result.DV[Level].Direction &= Dependence::DVEntry::GE; + Result.DV[Level].PeelLast = true; + ++WeakZeroSIVsuccesses; + } + return false; + } + } + + // check that Delta/SrcCoeff >= 0 + // really check that NewDelta >= 0 + if (SE->isKnownNegative(NewDelta)) { + // No dependence, newDelta < 0 + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + + // if SrcCoeff doesn't divide Delta, then no dependence + if (isa(Delta) && + !isRemainderZero(cast(Delta), ConstCoeff)) { + ++WeakZeroSIVindependence; + ++WeakZeroSIVsuccesses; + return true; + } + return false; +} + + +// exactRDIVtest - Tests the RDIV subscript pair for dependence. +// Things of the form [c1 + a*i] and [c2 + b*j], +// where i and j are induction variable, c1 and c2 are loop invariant, +// and a and b are constants. +// Returns true if any possible dependence is disproved. +// Marks the result as inconsistent. +// Works in some cases that symbolicRDIVtest doesn't, and vice versa. +bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *SrcLoop, + const Loop *DstLoop, + FullDependence &Result) const { + DEBUG(dbgs() << "\tExact RDIV test\n"); + DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); + DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); + DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); + DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); + ++ExactRDIVapplications; + Result.Consistent = false; + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); + const SCEVConstant *ConstDelta = dyn_cast(Delta); + const SCEVConstant *ConstSrcCoeff = dyn_cast(SrcCoeff); + const SCEVConstant *ConstDstCoeff = dyn_cast(DstCoeff); + if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) + return false; + + // find gcd + APInt G, X, Y; + APInt AM = ConstSrcCoeff->getValue()->getValue(); + APInt BM = ConstDstCoeff->getValue()->getValue(); + unsigned Bits = AM.getBitWidth(); + if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + // gcd doesn't divide Delta, no dependence + ++ExactRDIVindependence; + return true; + } + + DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); + + // since SCEV construction seems to normalize, LM = 0 + APInt SrcUM(Bits, 1, true); + bool SrcUMvalid = false; + // SrcUM is perhaps unavailable, let's check + if (const SCEVConstant *UpperBound = + collectConstantUpperBound(SrcLoop, Delta->getType())) { + SrcUM = UpperBound->getValue()->getValue(); + DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); + SrcUMvalid = true; + } + + APInt DstUM(Bits, 1, true); + bool DstUMvalid = false; + // UM is perhaps unavailable, let's check + if (const SCEVConstant *UpperBound = + collectConstantUpperBound(DstLoop, Delta->getType())) { + DstUM = UpperBound->getValue()->getValue(); + DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); + DstUMvalid = true; + } + + APInt TU(APInt::getSignedMaxValue(Bits)); + APInt TL(APInt::getSignedMinValue(Bits)); + + // test(BM/G, LM-X) and test(-BM/G, X-UM) + APInt TMUL = BM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (SrcUMvalid) { + TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (SrcUMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + + // test(AM/G, LM-Y) and test(-AM/G, Y-UM) + TMUL = AM.sdiv(G); + if (TMUL.sgt(0)) { + TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + if (DstUMvalid) { + TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } + } + else { + TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); + DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (DstUMvalid) { + TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); + DEBUG(dbgs() << "\t TL = " << TL << "\n"); + } + } + if (TL.sgt(TU)) + ++ExactRDIVindependence; + return TL.sgt(TU); +} + + +// symbolicRDIVtest - +// In Section 4.5 of the Practical Dependence Testing paper,the authors +// introduce a special case of Banerjee's Inequalities (also called the +// Extreme-Value Test) that can handle some of the SIV and RDIV cases, +// particularly cases with symbolics. Since it's only able to disprove +// dependence (not compute distances or directions), we'll use it as a +// fall back for the other tests. +// +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] +// where i and j are induction variables and c1 and c2 are loop invariants, +// we can use the symbolic tests to disprove some dependences, serving as a +// backup for the RDIV test. Note that i and j can be the same variable, +// letting this test serve as a backup for the various SIV tests. +// +// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some +// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) +// loop bounds for the i and j loops, respectively. So, ... +// +// c1 + a1*i = c2 + a2*j +// a1*i - a2*j = c2 - c1 +// +// To test for a dependence, we compute c2 - c1 and make sure it's in the +// range of the maximum and minimum possible values of a1*i - a2*j. +// Considering the signs of a1 and a2, we have 4 possible cases: +// +// 1) If a1 >= 0 and a2 >= 0, then +// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 +// -a2*N2 <= c2 - c1 <= a1*N1 +// +// 2) If a1 >= 0 and a2 <= 0, then +// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 +// 0 <= c2 - c1 <= a1*N1 - a2*N2 +// +// 3) If a1 <= 0 and a2 >= 0, then +// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 +// a1*N1 - a2*N2 <= c2 - c1 <= 0 +// +// 4) If a1 <= 0 and a2 <= 0, then +// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 +// a1*N1 <= c2 - c1 <= -a2*N2 +// +// return true if dependence disproved +bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, + const SCEV *A2, + const SCEV *C1, + const SCEV *C2, + const Loop *Loop1, + const Loop *Loop2) const { + ++SymbolicRDIVapplications; + DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); + DEBUG(dbgs() << "\t A1 = " << *A1); + DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); + DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); + DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); + DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); + const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); + const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); + DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); + DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); + const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); + const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); + DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); + DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); + if (SE->isKnownNonNegative(A1)) { + if (SE->isKnownNonNegative(A2)) { + // A1 >= 0 && A2 >= 0 + if (N1) { + // make sure that c2 - c1 <= a1*N1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { + ++SymbolicRDIVindependence; + return true; + } + } + if (N2) { + // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + else if (SE->isKnownNonPositive(A2)) { + // a1 >= 0 && a2 <= 0 + if (N1 && N2) { + // make sure that c2 - c1 <= a1*N1 - a2*N2 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); + DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { + ++SymbolicRDIVindependence; + return true; + } + } + // make sure that 0 <= c2 - c1 + if (SE->isKnownNegative(C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + else if (SE->isKnownNonPositive(A1)) { + if (SE->isKnownNonNegative(A2)) { + // a1 <= 0 && a2 >= 0 + if (N1 && N2) { + // make sure that a1*N1 - a2*N2 <= c2 - c1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); + DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + // make sure that c2 - c1 <= 0 + if (SE->isKnownPositive(C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + else if (SE->isKnownNonPositive(A2)) { + // a1 <= 0 && a2 <= 0 + if (N1) { + // make sure that a1*N1 <= c2 - c1 + const SCEV *A1N1 = SE->getMulExpr(A1, N1); + DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { + ++SymbolicRDIVindependence; + return true; + } + } + if (N2) { + // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 + const SCEV *A2N2 = SE->getMulExpr(A2, N2); + DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); + if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { + ++SymbolicRDIVindependence; + return true; + } + } + } + } + return false; +} + + +// testSIV - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] +// where i is an induction variable, c1 and c2 are loop invariant, and a1 and +// a2 are constant, we attack it with an SIV test. While they can all be +// solved with the Exact SIV test, it's worthwhile to use simpler tests when +// they apply; they're cheaper and sometimes more precise. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testSIV(const SCEV *Src, + const SCEV *Dst, + unsigned &Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + const SCEVAddRecExpr *SrcAddRec = dyn_cast(Src); + const SCEVAddRecExpr *DstAddRec = dyn_cast(Dst); + if (SrcAddRec && DstAddRec) { + const SCEV *SrcConst = SrcAddRec->getStart(); + const SCEV *DstConst = DstAddRec->getStart(); + const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); + const Loop *CurLoop = SrcAddRec->getLoop(); + assert(CurLoop == DstAddRec->getLoop() && + "both loops in SIV should be same"); + Level = mapSrcLoop(CurLoop); + bool disproven; + if (SrcCoeff == DstCoeff) + disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint); + else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) + disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint, SplitIter); + else + disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint); + return disproven || + gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); + } + if (SrcAddRec) { + const SCEV *SrcConst = SrcAddRec->getStart(); + const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + const SCEV *DstConst = Dst; + const Loop *CurLoop = SrcAddRec->getLoop(); + Level = mapSrcLoop(CurLoop); + return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, + Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); + } + if (DstAddRec) { + const SCEV *DstConst = DstAddRec->getStart(); + const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); + const SCEV *SrcConst = Src; + const Loop *CurLoop = DstAddRec->getLoop(); + Level = mapDstLoop(CurLoop); + return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, + CurLoop, Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); + } + llvm_unreachable("SIV test expected at least one AddRec"); + return false; +} + + +// testRDIV - +// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] +// where i and j are induction variables, c1 and c2 are loop invariant, +// and a1 and a2 are constant, we can solve it exactly with an easy adaptation +// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. +// It doesn't make sense to talk about distance or direction in this case, +// so there's no point in making special versions of the Strong SIV test or +// the Weak-crossing SIV test. +// +// With minor algebra, this test can also be used for things like +// [c1 + a1*i + a2*j][c2]. +// +// Return true if dependence disproved. +bool DependenceAnalysis::testRDIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + // we have 3 possible situations here: + // 1) [a*i + b] and [c*j + d] + // 2) [a*i + c*j + b] and [d] + // 3) [b] and [a*i + c*j + d] + // We need to find what we've got and get organized + + const SCEV *SrcConst, *DstConst; + const SCEV *SrcCoeff, *DstCoeff; + const Loop *SrcLoop, *DstLoop; + + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + const SCEVAddRecExpr *SrcAddRec = dyn_cast(Src); + const SCEVAddRecExpr *DstAddRec = dyn_cast(Dst); + if (SrcAddRec && DstAddRec) { + SrcConst = SrcAddRec->getStart(); + SrcCoeff = SrcAddRec->getStepRecurrence(*SE); + SrcLoop = SrcAddRec->getLoop(); + DstConst = DstAddRec->getStart(); + DstCoeff = DstAddRec->getStepRecurrence(*SE); + DstLoop = DstAddRec->getLoop(); + } + else if (SrcAddRec) { + if (const SCEVAddRecExpr *tmpAddRec = + dyn_cast(SrcAddRec->getStart())) { + SrcConst = tmpAddRec->getStart(); + SrcCoeff = tmpAddRec->getStepRecurrence(*SE); + SrcLoop = tmpAddRec->getLoop(); + DstConst = Dst; + DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); + DstLoop = SrcAddRec->getLoop(); + } + else + llvm_unreachable("RDIV reached by surprising SCEVs"); + } + else if (DstAddRec) { + if (const SCEVAddRecExpr *tmpAddRec = + dyn_cast(DstAddRec->getStart())) { + DstConst = tmpAddRec->getStart(); + DstCoeff = tmpAddRec->getStepRecurrence(*SE); + DstLoop = tmpAddRec->getLoop(); + SrcConst = Src; + SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); + SrcLoop = DstAddRec->getLoop(); + } + else + llvm_unreachable("RDIV reached by surprising SCEVs"); + } + else + llvm_unreachable("RDIV expected at least one AddRec"); + return exactRDIVtest(SrcCoeff, DstCoeff, + SrcConst, DstConst, + SrcLoop, DstLoop, + Result) || + gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, + SrcConst, DstConst, + SrcLoop, DstLoop); +} + + +// Tests the single-subscript MIV pair (Src and Dst) for dependence. +// Return true if dependence disproved. +// Can sometimes refine direction vectors. +bool DependenceAnalysis::testMIV(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const { + DEBUG(dbgs() << " src = " << *Src << "\n"); + DEBUG(dbgs() << " dst = " << *Dst << "\n"); + Result.Consistent = false; + return gcdMIVtest(Src, Dst, Result) || + banerjeeMIVtest(Src, Dst, Loops, Result); +} + + +// Given a product, e.g., 10*X*Y, returns the first constant operand, +// in this case 10. If there is no constant part, returns NULL. +static +const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { + for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { + if (const SCEVConstant *Constant = dyn_cast(Product->getOperand(Op))) + return Constant; + } + return NULL; +} + + +//===----------------------------------------------------------------------===// +// gcdMIVtest - +// Tests an MIV subscript pair for dependence. +// Returns true if any possible dependence is disproved. +// Marks the result as inconsistent. +// Can sometimes disprove the equal direction for 1 or more loops, +// as discussed in Michael Wolfe's book, +// High Performance Compilers for Parallel Computing, page 235. +// +// We spend some effort (code!) to handle cases like +// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, +// but M and N are just loop-invariant variables. +// This should help us handle linearized subscripts; +// also makes this test a useful backup to the various SIV tests. +// +// It occurs to me that the presence of loop-invariant variables +// changes the nature of the test from "greatest common divisor" +// to "a common divisor!" +bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const { + DEBUG(dbgs() << "starting gcd\n"); + ++GCDapplications; + unsigned BitWidth = Src->getType()->getIntegerBitWidth(); + APInt RunningGCD = APInt::getNullValue(BitWidth); + + // Examine Src coefficients. + // Compute running GCD and record source constant. + // Because we're looking for the constant at the end of the chain, + // we can't quit the loop just because the GCD == 1. + const SCEV *Coefficients = Src; + while (const SCEVAddRecExpr *AddRec = + dyn_cast(Coefficients)) { + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + const SCEVConstant *Constant = dyn_cast(Coeff); + if (const SCEVMulExpr *Product = dyn_cast(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + if (!Constant) + return false; + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + Coefficients = AddRec->getStart(); + } + const SCEV *SrcConst = Coefficients; + + // Examine Dst coefficients. + // Compute running GCD and record destination constant. + // Because we're looking for the constant at the end of the chain, + // we can't quit the loop just because the GCD == 1. + Coefficients = Dst; + while (const SCEVAddRecExpr *AddRec = + dyn_cast(Coefficients)) { + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + const SCEVConstant *Constant = dyn_cast(Coeff); + if (const SCEVMulExpr *Product = dyn_cast(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + if (!Constant) + return false; + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + Coefficients = AddRec->getStart(); + } + const SCEV *DstConst = Coefficients; + + APInt ExtraGCD = APInt::getNullValue(BitWidth); + const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); + DEBUG(dbgs() << " Delta = " << *Delta << "\n"); + const SCEVConstant *Constant = dyn_cast(Delta); + if (const SCEVAddExpr *Sum = dyn_cast(Delta)) { + // If Delta is a sum of products, we may be able to make further progress. + for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { + const SCEV *Operand = Sum->getOperand(Op); + if (isa(Operand)) { + assert(!Constant && "Surprised to find multiple constants"); + Constant = cast(Operand); + } + else if (const SCEVMulExpr *Product = dyn_cast(Operand)) { + // Search for constant operand to participate in GCD; + // If none found; return false. + const SCEVConstant *ConstOp = getConstantPart(Product); + if (!ConstOp) + return false; + APInt ConstOpValue = ConstOp->getValue()->getValue(); + ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, + ConstOpValue.abs()); + } + else + return false; + } + } + if (!Constant) + return false; + APInt ConstDelta = cast(Constant)->getValue()->getValue(); + DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); + if (ConstDelta == 0) + return false; + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); + DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); + APInt Remainder = ConstDelta.srem(RunningGCD); + if (Remainder != 0) { + ++GCDindependence; + return true; + } + + // Try to disprove equal directions. + // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], + // the code above can't disprove the dependence because the GCD = 1. + // So we consider what happen if i = i' and what happens if j = j'. + // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], + // which is infeasible, so we can disallow the = direction for the i level. + // Setting j = j' doesn't help matters, so we end up with a direction vector + // of [<>, *] + // + // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], + // we need to remember that the constant part is 5 and the RunningGCD should + // be initialized to ExtraGCD = 30. + DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); + + bool Improved = false; + Coefficients = Src; + while (const SCEVAddRecExpr *AddRec = + dyn_cast(Coefficients)) { + Coefficients = AddRec->getStart(); + const Loop *CurLoop = AddRec->getLoop(); + RunningGCD = ExtraGCD; + const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); + const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); + const SCEV *Inner = Src; + while (RunningGCD != 1 && isa(Inner)) { + AddRec = cast(Inner); + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + if (CurLoop == AddRec->getLoop()) + ; // SrcCoeff == Coeff + else { + if (const SCEVMulExpr *Product = dyn_cast(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else + Constant = cast(Coeff); + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + } + Inner = AddRec->getStart(); + } + Inner = Dst; + while (RunningGCD != 1 && isa(Inner)) { + AddRec = cast(Inner); + const SCEV *Coeff = AddRec->getStepRecurrence(*SE); + if (CurLoop == AddRec->getLoop()) + DstCoeff = Coeff; + else { + if (const SCEVMulExpr *Product = dyn_cast(Coeff)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else + Constant = cast(Coeff); + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + } + Inner = AddRec->getStart(); + } + Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); + if (const SCEVMulExpr *Product = dyn_cast(Delta)) + // If the coefficient is the product of a constant and other stuff, + // we can use the constant in the GCD computation. + Constant = getConstantPart(Product); + else if (isa(Delta)) + Constant = cast(Delta); + else { + // The difference of the two coefficients might not be a product + // or constant, in which case we give up on this direction. + continue; + } + APInt ConstCoeff = Constant->getValue()->getValue(); + RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); + DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); + if (RunningGCD != 0) { + Remainder = ConstDelta.srem(RunningGCD); + DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); + if (Remainder != 0) { + unsigned Level = mapSrcLoop(CurLoop); + Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); + Improved = true; + } + } + } + if (Improved) + ++GCDsuccesses; + DEBUG(dbgs() << "all done\n"); + return false; +} + + +//===----------------------------------------------------------------------===// +// banerjeeMIVtest - +// Use Banerjee's Inequalities to test an MIV subscript pair. +// (Wolfe, in the race-car book, calls this the Extreme Value Test.) +// Generally follows the discussion in Section 2.5.2 of +// +// Optimizing Supercompilers for Supercomputers +// Michael Wolfe +// +// The inequalities given on page 25 are simplified in that loops are +// normalized so that the lower bound is always 0 and the stride is always 1. +// For example, Wolfe gives +// +// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// +// where A_k is the coefficient of the kth index in the source subscript, +// B_k is the coefficient of the kth index in the destination subscript, +// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth +// index, and N_k is the stride of the kth index. Since all loops are normalized +// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the +// equation to +// +// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 +// = (A^-_k - B_k)^- (U_k - 1) - B_k +// +// Similar simplifications are possible for the other equations. +// +// When we can't determine the number of iterations for a loop, +// we use NULL as an indicator for the worst case, infinity. +// When computing the upper bound, NULL denotes +inf; +// for the lower bound, NULL denotes -inf. +// +// Return true if dependence disproved. +bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const { + DEBUG(dbgs() << "starting Banerjee\n"); + ++BanerjeeApplications; + DEBUG(dbgs() << " Src = " << *Src << '\n'); + const SCEV *A0; + CoefficientInfo *A = collectCoeffInfo(Src, true, A0); + DEBUG(dbgs() << " Dst = " << *Dst << '\n'); + const SCEV *B0; + CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); + BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; + const SCEV *Delta = SE->getMinusSCEV(B0, A0); + DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); + + // Compute bounds for all the * directions. + DEBUG(dbgs() << "\tBounds[*]\n"); + for (unsigned K = 1; K <= MaxLevels; ++K) { + Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; + Bound[K].Direction = Dependence::DVEntry::ALL; + Bound[K].DirSet = Dependence::DVEntry::NONE; + findBoundsALL(A, B, Bound, K); +#ifndef NDEBUG + DEBUG(dbgs() << "\t " << K << '\t'); + if (Bound[K].Lower[Dependence::DVEntry::ALL]) + DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[K].Upper[Dependence::DVEntry::ALL]) + DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); +#endif + } + + // Test the *, *, *, ... case. + bool Disproved = false; + if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { + // Explore the direction vector hierarchy. + unsigned DepthExpanded = 0; + unsigned NewDeps = exploreDirections(1, A, B, Bound, + Loops, DepthExpanded, Delta); + if (NewDeps > 0) { + bool Improved = false; + for (unsigned K = 1; K <= CommonLevels; ++K) { + if (Loops[K]) { + unsigned Old = Result.DV[K - 1].Direction; + Result.DV[K - 1].Direction = Old & Bound[K].DirSet; + Improved |= Old != Result.DV[K - 1].Direction; + if (!Result.DV[K - 1].Direction) { + Improved = false; + Disproved = true; + break; + } + } + } + if (Improved) + ++BanerjeeSuccesses; + } + else { + ++BanerjeeIndependence; + Disproved = true; + } + } + else { + ++BanerjeeIndependence; + Disproved = true; + } + delete [] Bound; + delete [] A; + delete [] B; + return Disproved; +} + + +// Hierarchically expands the direction vector +// search space, combining the directions of discovered dependences +// in the DirSet field of Bound. Returns the number of distinct +// dependences discovered. If the dependence is disproved, +// it will return 0. +unsigned DependenceAnalysis::exploreDirections(unsigned Level, + CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + const SmallBitVector &Loops, + unsigned &DepthExpanded, + const SCEV *Delta) const { + if (Level > CommonLevels) { + // record result + DEBUG(dbgs() << "\t["); + for (unsigned K = 1; K <= CommonLevels; ++K) { + if (Loops[K]) { + Bound[K].DirSet |= Bound[K].Direction; +#ifndef NDEBUG + switch (Bound[K].Direction) { + case Dependence::DVEntry::LT: + DEBUG(dbgs() << " <"); + break; + case Dependence::DVEntry::EQ: + DEBUG(dbgs() << " ="); + break; + case Dependence::DVEntry::GT: + DEBUG(dbgs() << " >"); + break; + case Dependence::DVEntry::ALL: + DEBUG(dbgs() << " *"); + break; + default: + llvm_unreachable("unexpected Bound[K].Direction"); + } +#endif + } + } + DEBUG(dbgs() << " ]\n"); + return 1; + } + if (Loops[Level]) { + if (Level > DepthExpanded) { + DepthExpanded = Level; + // compute bounds for <, =, > at current level + findBoundsLT(A, B, Bound, Level); + findBoundsGT(A, B, Bound, Level); + findBoundsEQ(A, B, Bound, Level); +#ifndef NDEBUG + DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); + DEBUG(dbgs() << "\t <\t"); + if (Bound[Level].Lower[Dependence::DVEntry::LT]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::LT]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); + DEBUG(dbgs() << "\t =\t"); + if (Bound[Level].Lower[Dependence::DVEntry::EQ]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::EQ]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); + DEBUG(dbgs() << "\t >\t"); + if (Bound[Level].Lower[Dependence::DVEntry::GT]) + DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); + else + DEBUG(dbgs() << "-inf\t"); + if (Bound[Level].Upper[Dependence::DVEntry::GT]) + DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); + else + DEBUG(dbgs() << "+inf\n"); +#endif + } + + unsigned NewDeps = 0; + + // test bounds for <, *, *, ... + if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + // Test bounds for =, *, *, ... + if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + // test bounds for >, *, *, ... + if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) + NewDeps += exploreDirections(Level + 1, A, B, Bound, + Loops, DepthExpanded, Delta); + + Bound[Level].Direction = Dependence::DVEntry::ALL; + return NewDeps; + } + else + return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); +} + + +// Returns true iff the current bounds are plausible. +bool DependenceAnalysis::testBounds(unsigned char DirKind, + unsigned Level, + BoundInfo *Bound, + const SCEV *Delta) const { + Bound[Level].Direction = DirKind; + if (const SCEV *LowerBound = getLowerBound(Bound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) + return false; + if (const SCEV *UpperBound = getUpperBound(Bound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) + return false; + return true; +} + + +// Computes the upper and lower bounds for level K +// using the * direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k +// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^*_k = (A^-_k - B^+_k)U_k +// UB^*_k = (A^+_k - B^-_k)U_k +// +// We must be careful to handle the case where the upper bound is unknown. +// Note that the lower bound is always <= 0 +// and the upper bound is always >= 0. +void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + Bound[K].Lower[Dependence::DVEntry::ALL] = + SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), + Bound[K].Iterations); + Bound[K].Upper[Dependence::DVEntry::ALL] = + SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), + Bound[K].Iterations); + } + else { + // If the difference is 0, we won't need to know the number of iterations. + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) + Bound[K].Lower[Dependence::DVEntry::ALL] = + SE->getConstant(A[K].Coeff->getType(), 0); + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) + Bound[K].Upper[Dependence::DVEntry::ALL] = + SE->getConstant(A[K].Coeff->getType(), 0); + } +} + + +// Computes the upper and lower bounds for level K +// using the = direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k +// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^=_k = (A_k - B_k)^- U_k +// UB^=_k = (A_k - B_k)^+ U_k +// +// We must be careful to handle the case where the upper bound is unknown. +// Note that the lower bound is always <= 0 +// and the upper bound is always >= 0. +void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); + const SCEV *NegativePart = getNegativePart(Delta); + Bound[K].Lower[Dependence::DVEntry::EQ] = + SE->getMulExpr(NegativePart, Bound[K].Iterations); + const SCEV *PositivePart = getPositivePart(Delta); + Bound[K].Upper[Dependence::DVEntry::EQ] = + SE->getMulExpr(PositivePart, Bound[K].Iterations); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); + const SCEV *NegativePart = getNegativePart(Delta); + if (NegativePart->isZero()) + Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero + const SCEV *PositivePart = getPositivePart(Delta); + if (PositivePart->isZero()) + Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero + } +} + + +// Computes the upper and lower bounds for level K +// using the < direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k +// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k +// +// We must be careful to handle the case where the upper bound is unknown. +void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Iter_1 = + SE->getMinusSCEV(Bound[K].Iterations, + SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); + Bound[K].Lower[Dependence::DVEntry::LT] = + SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); + Bound[K].Upper[Dependence::DVEntry::LT] = + SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); + if (NegPart->isZero()) + Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); + if (PosPart->isZero()) + Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); + } +} + + +// Computes the upper and lower bounds for level K +// using the > direction. Records them in Bound. +// Wolfe gives the equations +// +// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k +// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k +// +// Since we normalize loops, we can simplify these equations to +// +// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k +// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k +// +// We must be careful to handle the case where the upper bound is unknown. +void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const { + Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. + if (Bound[K].Iterations) { + const SCEV *Iter_1 = + SE->getMinusSCEV(Bound[K].Iterations, + SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *NegPart = + getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); + Bound[K].Lower[Dependence::DVEntry::GT] = + SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); + const SCEV *PosPart = + getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); + Bound[K].Upper[Dependence::DVEntry::GT] = + SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); + } + else { + // If the positive/negative part of the difference is 0, + // we won't need to know the number of iterations. + const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); + if (NegPart->isZero()) + Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; + const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); + if (PosPart->isZero()) + Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; + } +} + + +// X^+ = max(X, 0) +const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { + return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); +} + + +// X^- = min(X, 0) +const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { + return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); +} + + +// Walks through the subscript, +// collecting each coefficient, the associated loop bounds, +// and recording its positive and negative parts for later use. +DependenceAnalysis::CoefficientInfo * +DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, + bool SrcFlag, + const SCEV *&Constant) const { + const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); + CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; + for (unsigned K = 1; K <= MaxLevels; ++K) { + CI[K].Coeff = Zero; + CI[K].PosPart = Zero; + CI[K].NegPart = Zero; + CI[K].Iterations = NULL; + } + while (const SCEVAddRecExpr *AddRec = dyn_cast(Subscript)) { + const Loop *L = AddRec->getLoop(); + unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); + CI[K].Coeff = AddRec->getStepRecurrence(*SE); + CI[K].PosPart = getPositivePart(CI[K].Coeff); + CI[K].NegPart = getNegativePart(CI[K].Coeff); + CI[K].Iterations = collectUpperBound(L, Subscript->getType()); + Subscript = AddRec->getStart(); + } + Constant = Subscript; +#ifndef NDEBUG + DEBUG(dbgs() << "\tCoefficient Info\n"); + for (unsigned K = 1; K <= MaxLevels; ++K) { + DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); + DEBUG(dbgs() << "\tPos Part = "); + DEBUG(dbgs() << *CI[K].PosPart); + DEBUG(dbgs() << "\tNeg Part = "); + DEBUG(dbgs() << *CI[K].NegPart); + DEBUG(dbgs() << "\tUpper Bound = "); + if (CI[K].Iterations) + DEBUG(dbgs() << *CI[K].Iterations); + else + DEBUG(dbgs() << "+inf"); + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); +#endif + return CI; +} + + +// Looks through all the bounds info and +// computes the lower bound given the current direction settings +// at each level. If the lower bound for any level is -inf, +// the result is -inf. +const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { + const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; + for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { + if (Bound[K].Lower[Bound[K].Direction]) + Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); + else + Sum = NULL; + } + return Sum; +} + + +// Looks through all the bounds info and +// computes the upper bound given the current direction settings +// at each level. If the upper bound at any level is +inf, +// the result is +inf. +const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { + const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; + for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { + if (Bound[K].Upper[Bound[K].Direction]) + Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); + else + Sum = NULL; + } + return Sum; +} + + +//===----------------------------------------------------------------------===// +// Constraint manipulation for Delta test. + +// Given a linear SCEV, +// return the coefficient (the step) +// corresponding to the specified loop. +// If there isn't one, return 0. +// For example, given a*i + b*j + c*k, zeroing the coefficient +// corresponding to the j loop would yield b. +const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec) + return SE->getConstant(Expr->getType(), 0); + if (AddRec->getLoop() == TargetLoop) + return AddRec->getStepRecurrence(*SE); + return findCoefficient(AddRec->getStart(), TargetLoop); +} + + +// Given a linear SCEV, +// return the SCEV given by zeroing out the coefficient +// corresponding to the specified loop. +// For example, given a*i + b*j + c*k, zeroing the coefficient +// corresponding to the j loop would yield a*i + c*k. +const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec) + return Expr; // ignore + if (AddRec->getLoop() == TargetLoop) + return AddRec->getStart(); + return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), + AddRec->getStepRecurrence(*SE), + AddRec->getLoop(), + AddRec->getNoWrapFlags()); +} + + +// Given a linear SCEV Expr, +// return the SCEV given by adding some Value to the +// coefficient corresponding to the specified TargetLoop. +// For example, given a*i + b*j + c*k, adding 1 to the coefficient +// corresponding to the j loop would yield a*i + (b+1)*j + c*k. +const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, + const Loop *TargetLoop, + const SCEV *Value) const { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec) // create a new addRec + return SE->getAddRecExpr(Expr, + Value, + TargetLoop, + SCEV::FlagAnyWrap); // Worst case, with no info. + if (AddRec->getLoop() == TargetLoop) { + const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); + if (Sum->isZero()) + return AddRec->getStart(); + return SE->getAddRecExpr(AddRec->getStart(), + Sum, + AddRec->getLoop(), + AddRec->getNoWrapFlags()); + } + return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), + TargetLoop, Value), + AddRec->getStepRecurrence(*SE), + AddRec->getLoop(), + AddRec->getNoWrapFlags()); +} + + +// Review the constraints, looking for opportunities +// to simplify a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +// Corresponds to Figure 5 from the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +bool DependenceAnalysis::propagate(const SCEV *&Src, + const SCEV *&Dst, + SmallBitVector &Loops, + SmallVector &Constraints, + bool &Consistent) { + bool Result = false; + for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { + DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); + DEBUG(Constraints[LI].dump(dbgs())); + if (Constraints[LI].isDistance()) + Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); + else if (Constraints[LI].isLine()) + Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); + else if (Constraints[LI].isPoint()) + Result |= propagatePoint(Src, Dst, Constraints[LI]); + } + return Result; +} + + +// Attempt to propagate a distance +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +bool DependenceAnalysis::propagateDistance(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + if (A_K->isZero()) + return false; + const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); + Src = SE->getMinusSCEV(Src, DA_K); + Src = zeroCoefficient(Src, CurLoop); + DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); + DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + return true; +} + + +// Attempt to propagate a line +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +// If the simplification isn't exact (that is, if it is conservative +// in terms of dependence), set consistent to false. +bool DependenceAnalysis::propagateLine(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const SCEV *A = CurConstraint.getA(); + const SCEV *B = CurConstraint.getB(); + const SCEV *C = CurConstraint.getC(); + DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); + DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); + if (A->isZero()) { + const SCEVConstant *Bconst = dyn_cast(B); + const SCEVConstant *Cconst = dyn_cast(C); + if (!Bconst || !Cconst) return false; + APInt Beta = Bconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivB = Charlie.sdiv(Beta); + assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); + const SCEV *AP_K = findCoefficient(Dst, CurLoop); + // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); + Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); + Dst = zeroCoefficient(Dst, CurLoop); + if (!findCoefficient(Src, CurLoop)->isZero()) + Consistent = false; + } + else if (B->isZero()) { + const SCEVConstant *Aconst = dyn_cast(A); + const SCEVConstant *Cconst = dyn_cast(C); + if (!Aconst || !Cconst) return false; + APInt Alpha = Aconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivA = Charlie.sdiv(Alpha); + assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); + Src = zeroCoefficient(Src, CurLoop); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { + const SCEVConstant *Aconst = dyn_cast(A); + const SCEVConstant *Cconst = dyn_cast(C); + if (!Aconst || !Cconst) return false; + APInt Alpha = Aconst->getValue()->getValue(); + APInt Charlie = Cconst->getValue()->getValue(); + APInt CdivA = Charlie.sdiv(Alpha); + assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); + Src = zeroCoefficient(Src, CurLoop); + Dst = addToCoefficient(Dst, CurLoop, A_K); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + else { + // paper is incorrect here, or perhaps just misleading + const SCEV *A_K = findCoefficient(Src, CurLoop); + Src = SE->getMulExpr(Src, A); + Dst = SE->getMulExpr(Dst, A); + Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); + Src = zeroCoefficient(Src, CurLoop); + Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); + if (!findCoefficient(Dst, CurLoop)->isZero()) + Consistent = false; + } + DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); + DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); + return true; +} + + +// Attempt to propagate a point +// constraint into a subscript pair (Src and Dst). +// Return true if some simplification occurs. +bool DependenceAnalysis::propagatePoint(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint) { + const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const SCEV *A_K = findCoefficient(Src, CurLoop); + const SCEV *AP_K = findCoefficient(Dst, CurLoop); + const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); + const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); + DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); + Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); + Src = zeroCoefficient(Src, CurLoop); + DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); + DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); + Dst = zeroCoefficient(Dst, CurLoop); + DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); + return true; +} + + +// Update direction vector entry based on the current constraint. +void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, + const Constraint &CurConstraint + ) const { + DEBUG(dbgs() << "\tUpdate direction, constraint ="); + DEBUG(CurConstraint.dump(dbgs())); + if (CurConstraint.isAny()) + ; // use defaults + else if (CurConstraint.isDistance()) { + // this one is consistent, the others aren't + Level.Scalar = false; + Level.Distance = CurConstraint.getD(); + unsigned NewDirection = Dependence::DVEntry::NONE; + if (!SE->isKnownNonZero(Level.Distance)) // if may be zero + NewDirection = Dependence::DVEntry::EQ; + if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive + NewDirection |= Dependence::DVEntry::LT; + if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative + NewDirection |= Dependence::DVEntry::GT; + Level.Direction &= NewDirection; + } + else if (CurConstraint.isLine()) { + Level.Scalar = false; + Level.Distance = NULL; + // direction should be accurate + } + else if (CurConstraint.isPoint()) { + Level.Scalar = false; + Level.Distance = NULL; + unsigned NewDirection = Dependence::DVEntry::NONE; + if (!isKnownPredicate(CmpInst::ICMP_NE, + CurConstraint.getY(), + CurConstraint.getX())) + // if X may be = Y + NewDirection |= Dependence::DVEntry::EQ; + if (!isKnownPredicate(CmpInst::ICMP_SLE, + CurConstraint.getY(), + CurConstraint.getX())) + // if Y may be > X + NewDirection |= Dependence::DVEntry::LT; + if (!isKnownPredicate(CmpInst::ICMP_SGE, + CurConstraint.getY(), + CurConstraint.getX())) + // if Y may be < X + NewDirection |= Dependence::DVEntry::GT; + Level.Direction &= NewDirection; + } + else + llvm_unreachable("constraint has unexpected kind"); +} + + +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +// For debugging purposes, dump a small bit vector to dbgs(). +static void dumpSmallBitVector(SmallBitVector &BV) { + dbgs() << "{"; + for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { + dbgs() << VI; + if (BV.find_next(VI) >= 0) + dbgs() << ' '; + } + dbgs() << "}\n"; +} +#endif + + +// depends - +// Returns NULL if there is no dependence. +// Otherwise, return a Dependence with as many details as possible. +// Corresponds to Section 3.1 in the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// Care is required to keep the code below up to date w.r.t. this routine. +Dependence *DependenceAnalysis::depends(const Instruction *Src, + const Instruction *Dst, + bool PossiblyLoopIndependent) { + if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || + (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) + // if both instructions don't reference memory, there's no dependence + return NULL; + + if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) + // can only analyze simple loads and stores, i.e., no calls, invokes, etc. + return new Dependence(Src, Dst); + + const Value *SrcPtr = getPointerOperand(Src); + const Value *DstPtr = getPointerOperand(Dst); + + switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + // cannot analyse objects if we don't understand their aliasing. + return new Dependence(Src, Dst); + case AliasAnalysis::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + return NULL; + case AliasAnalysis::MustAlias: + break; // The underlying objects alias; test accesses for dependence. + } + + const GEPOperator *SrcGEP = dyn_cast(SrcPtr); + const GEPOperator *DstGEP = dyn_cast(DstPtr); + if (!SrcGEP || !DstGEP) + return new Dependence(Src, Dst); // missing GEP, assume dependence + + if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType()) + return new Dependence(Src, Dst); // different types, assume dependence + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + + FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); + ++TotalArrayPairs; + + // classify subscript pairs + unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + SmallVector Pair(Pairs); + for (unsigned SI = 0; SI < Pairs; ++SI) { + Pair[SI].Loops.resize(MaxLevels + 1); + Pair[SI].GroupLoops.resize(MaxLevels + 1); + Pair[SI].Group.resize(Pairs); + } + Pairs = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(), + DstEnd = DstGEP->idx_end(); + SrcIdx != SrcEnd && DstIdx != DstEnd; + ++SrcIdx, ++DstIdx, ++Pairs) { + Pair[Pairs].Src = SE->getSCEV(*SrcIdx); + Pair[Pairs].Dst = SE->getSCEV(*DstIdx); + removeMatchingExtensions(&Pair[Pairs]); + Pair[Pairs].Classification = + classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), + Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), + Pair[Pairs].Loops); + Pair[Pairs].GroupLoops = Pair[Pairs].Loops; + Pair[Pairs].Group.set(Pairs); + DEBUG(dbgs() << " subscript " << Pairs << "\n"); + DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n"); + DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n"); + DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n"); + DEBUG(dbgs() << "\tloops = "); + DEBUG(dumpSmallBitVector(Pair[Pairs].Loops)); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // Partition subscripts into separable and minimally-coupled groups + // Algorithm in paper is algorithmically better; + // this may be faster in practice. Check someday. + // + // Here's an example of how it works. Consider this code: + // + // for (i = ...) { + // for (j = ...) { + // for (k = ...) { + // for (l = ...) { + // for (m = ...) { + // A[i][j][k][m] = ...; + // ... = A[0][j][l][i + j]; + // } + // } + // } + // } + // } + // + // There are 4 subscripts here: + // 0 [i] and [0] + // 1 [j] and [j] + // 2 [k] and [l] + // 3 [m] and [i + j] + // + // We've already classified each subscript pair as ZIV, SIV, etc., + // and collected all the loops mentioned by pair P in Pair[P].Loops. + // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops + // and set Pair[P].Group = {P}. + // + // Src Dst Classification Loops GroupLoops Group + // 0 [i] [0] SIV {1} {1} {0} + // 1 [j] [j] SIV {2} {2} {1} + // 2 [k] [l] RDIV {3,4} {3,4} {2} + // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} + // + // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. + // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. + // + // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. + // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. + // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, + // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added + // to either Separable or Coupled). + // + // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. + // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, + // so Pair[3].Group = {0, 1, 3} and Done = false. + // + // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. + // Since Done remains true, we add 2 to the set of Separable pairs. + // + // Finally, we consider 3. There's nothing to compare it with, + // so Done remains true and we add it to the Coupled set. + // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. + // + // In the end, we've got 1 separable subscript and 1 coupled group. + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + ++NonlinearSubscriptPairs; + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } + else if (Pair[SI].Classification == Subscript::ZIV) { + // always separable + Separable.set(SI); + } + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) { + Separable.set(SI); + ++SeparableSubscriptPairs; + } + else { + Coupled.set(SI); + ++CoupledSubscriptPairs; + } + } + } + } + + DEBUG(dbgs() << " Separable = "); + DEBUG(dumpSmallBitVector(Separable)); + DEBUG(dbgs() << " Coupled = "); + DEBUG(dumpSmallBitVector(Coupled)); + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + DEBUG(dbgs() << "testing subscript " << SI); + switch (Pair[SI].Classification) { + case Subscript::ZIV: + DEBUG(dbgs() << ", ZIV\n"); + if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return NULL; + break; + case Subscript::SIV: { + DEBUG(dbgs() << ", SIV\n"); + unsigned Level; + const SCEV *SplitIter = NULL; + if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, + Result, NewConstraint, SplitIter)) + return NULL; + break; + } + case Subscript::RDIV: + DEBUG(dbgs() << ", RDIV\n"); + if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return NULL; + break; + case Subscript::MIV: + DEBUG(dbgs() << ", MIV\n"); + if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) + return NULL; + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + DEBUG(dbgs() << "starting on coupled subscripts\n"); + DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); + SmallVector Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + DEBUG(dbgs() << "testing subscript group " << SI << " { "); + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + DEBUG(dbgs() << SJ << " "); + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + } + DEBUG(dbgs() << "}\n"); + while (Sivs.any()) { + bool Changed = false; + for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = NULL; + DEBUG(dbgs() << "SIV\n"); + if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, + Result, NewConstraint, SplitIter)) + return NULL; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) { + if (Constraints[Level].isEmpty()) { + ++DeltaIndependence; + return NULL; + } + Changed = true; + } + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + DEBUG(dbgs() << " propagating\n"); + DEBUG(dbgs() << "\tMivs = "); + DEBUG(dumpSmallBitVector(Mivs)); + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + // SJ is an MIV subscript that's part of the current coupled group + DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, + Constraints, Result.Consistent)) { + DEBUG(dbgs() << "\t Changed\n"); + ++DeltaPropagations; + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + DEBUG(dbgs() << "ZIV\n"); + if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return NULL; + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + + // test & propagate remaining RDIVs + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::RDIV) { + DEBUG(dbgs() << "RDIV test\n"); + if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return NULL; + // I don't yet understand how to propagate RDIV results + Mivs.reset(SJ); + } + } + + // test remaining MIVs + // This code is temporary. + // Better to somehow test all remaining subscripts simultaneously. + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::MIV) { + DEBUG(dbgs() << "MIV test\n"); + if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) + return NULL; + } + else + llvm_unreachable("expected only MIV subscripts at this point"); + } + + // update Result.DV from constraint vector + DEBUG(dbgs() << " updating\n"); + for (int SJ = ConstrainedLevels.find_first(); + SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { + updateDirection(Result.DV[SJ - 1], Constraints[SJ]); + if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) + return NULL; + } + } + } + + // make sure Scalar flags are set correctly + SmallBitVector CompleteLoops(MaxLevels + 1); + for (unsigned SI = 0; SI < Pairs; ++SI) + CompleteLoops |= Pair[SI].Loops; + for (unsigned II = 1; II <= CommonLevels; ++II) + if (CompleteLoops[II]) + Result.DV[II - 1].Scalar = false; + + // make sure loopIndepent flag is set correctly + if (PossiblyLoopIndependent) { + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { + Result.LoopIndependent = false; + break; + } + } + } + + FullDependence *Final = new FullDependence(Result); + Result.DV = NULL; + return Final; +} + + + +//===----------------------------------------------------------------------===// +// getSplitIteration - +// Rather than spend rarely-used space recording the splitting iteration +// during the Weak-Crossing SIV test, we re-compute it on demand. +// The re-computation is basically a repeat of the entire dependence test, +// though simplified since we know that the dependence exists. +// It's tedious, since we must go through all propagations, etc. +// +// Care is required to keep this code up to date w.r.t. the code above. +// +// Generally, the dependence analyzer will be used to build +// a dependence graph for a function (basically a map from instructions +// to dependences). Looking for cycles in the graph shows us loops +// that cannot be trivially vectorized/parallelized. +// +// We can try to improve the situation by examining all the dependences +// that make up the cycle, looking for ones we can break. +// Sometimes, peeling the first or last iteration of a loop will break +// dependences, and we've got flags for those possibilities. +// Sometimes, splitting a loop at some other iteration will do the trick, +// and we've got a flag for that case. Rather than waste the space to +// record the exact iteration (since we rarely know), we provide +// a method that calculates the iteration. It's a drag that it must work +// from scratch, but wonderful in that it's possible. +// +// Here's an example: +// +// for (i = 0; i < 10; i++) +// A[i] = ... +// ... = A[11 - i] +// +// There's a loop-carried flow dependence from the store to the load, +// found by the weak-crossing SIV test. The dependence will have a flag, +// indicating that the dependence can be broken by splitting the loop. +// Calling getSplitIteration will return 5. +// Splitting the loop breaks the dependence, like so: +// +// for (i = 0; i <= 5; i++) +// A[i] = ... +// ... = A[11 - i] +// for (i = 6; i < 10; i++) +// A[i] = ... +// ... = A[11 - i] +// +// breaks the dependence and allows us to vectorize/parallelize +// both loops. +const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, + unsigned SplitLevel) { + assert(Dep && "expected a pointer to a Dependence"); + assert(Dep->isSplitable(SplitLevel) && + "Dep should be splitable at SplitLevel"); + const Instruction *Src = Dep->getSrc(); + const Instruction *Dst = Dep->getDst(); + assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); + assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); + assert(isLoadOrStore(Src)); + assert(isLoadOrStore(Dst)); + const Value *SrcPtr = getPointerOperand(Src); + const Value *DstPtr = getPointerOperand(Dst); + assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == + AliasAnalysis::MustAlias); + const GEPOperator *SrcGEP = dyn_cast(SrcPtr); + const GEPOperator *DstGEP = dyn_cast(DstPtr); + assert(SrcGEP); + assert(DstGEP); + assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()); + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + + FullDependence Result(Src, Dst, false, CommonLevels); + + // classify subscript pairs + unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + SmallVector Pair(Pairs); + for (unsigned SI = 0; SI < Pairs; ++SI) { + Pair[SI].Loops.resize(MaxLevels + 1); + Pair[SI].GroupLoops.resize(MaxLevels + 1); + Pair[SI].Group.resize(Pairs); + } + Pairs = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(), + DstEnd = DstGEP->idx_end(); + SrcIdx != SrcEnd && DstIdx != DstEnd; + ++SrcIdx, ++DstIdx, ++Pairs) { + Pair[Pairs].Src = SE->getSCEV(*SrcIdx); + Pair[Pairs].Dst = SE->getSCEV(*DstIdx); + Pair[Pairs].Classification = + classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), + Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), + Pair[Pairs].Loops); + Pair[Pairs].GroupLoops = Pair[Pairs].Loops; + Pair[Pairs].Group.set(Pairs); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // partition subscripts into separable and minimally-coupled groups + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } + else if (Pair[SI].Classification == Subscript::ZIV) + Separable.set(SI); + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) + Separable.set(SI); + else + Coupled.set(SI); + } + } + } + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + switch (Pair[SI].Classification) { + case Subscript::SIV: { + unsigned Level; + const SCEV *SplitIter = NULL; + (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, + Result, NewConstraint, SplitIter); + if (Level == SplitLevel) { + assert(SplitIter != NULL); + return SplitIter; + } + break; + } + case Subscript::ZIV: + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + SmallVector Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + } + while (Sivs.any()) { + bool Changed = false; + for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = NULL; + (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, + Result, NewConstraint, SplitIter); + if (Level == SplitLevel && SplitIter) + return SplitIter; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) + Changed = true; + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + // SJ is an MIV subscript that's part of the current coupled group + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, + Pair[SJ].Loops, Constraints, Result.Consistent)) { + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + } + } + llvm_unreachable("somehow reached end of routine"); + return NULL; +} diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 1604576..3e537e9 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -133,7 +133,9 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void DominanceFrontierBase::dump() const { print(dbgs()); } +#endif diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 0df3e8a..dec0ece 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -141,12 +141,13 @@ private: for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { CallSite CS(cast(II)); - if (CS && !isa(II)) { + if (CS) { const Function *Callee = CS.getCalledFunction(); - if (Callee) - Node->addCalledFunction(CS, getOrInsertFunction(Callee)); - else + if (!Callee) + // Indirect calls of intrinsics are not allowed so no need to check. Node->addCalledFunction(CS, CallsExternalNode); + else if (!Callee->isIntrinsic()) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); } } } @@ -198,9 +199,11 @@ void CallGraph::print(raw_ostream &OS, Module*) const { for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void CallGraph::dump() const { print(dbgs(), 0); } +#endif //===----------------------------------------------------------------------===// // Implementations of public modification methods @@ -267,7 +270,9 @@ void CallGraphNode::print(raw_ostream &OS) const { OS << '\n'; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void CallGraphNode::dump() const { print(dbgs()); } +#endif /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 22f6e96..990caa8 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -263,7 +263,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else if (BitCastInst *BCI = dyn_cast(U)) { if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) return true; - } else if (isFreeCall(U)) { + } else if (isFreeCall(U, TLI)) { Writers.push_back(cast(U)->getParent()->getParent()); } else if (CallInst *CI = dyn_cast(U)) { // Make sure that this is just the function being called, not that it is @@ -329,7 +329,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Check the value being stored. Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); - if (!isAllocLikeFn(Ptr)) + if (!isAllocLikeFn(Ptr, TLI)) return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a @@ -458,7 +458,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (SI->isVolatile()) // Treat volatile stores as reading memory somewhere. FunctionEffect |= Ref; - } else if (isAllocationFn(&*II) || isFreeCall(&*II)) { + } else if (isAllocationFn(&*II, TLI) || isFreeCall(&*II, TLI)) { FunctionEffect |= ModRef; } else if (IntrinsicInst *Intrinsic = dyn_cast(&*II)) { // The callgraph doesn't include intrinsic calls. diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 0a6682a..d4221b8 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -22,7 +22,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" @@ -235,7 +235,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { LI = &getAnalysis(); DT = &getAnalysis(); SE = &getAnalysis(); - TD = getAnalysisIfAvailable(); + TD = getAnalysisIfAvailable(); // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for @@ -273,9 +273,11 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void IVUsers::dump() const { print(dbgs()); } +#endif void IVUsers::releaseMemory() { Processed.clear(); diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index bc1ecd2..5f51f77 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -24,7 +24,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/GlobalAlias.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -41,8 +41,8 @@ class CallAnalyzer : public InstVisitor { typedef InstVisitor Base; friend class InstVisitor; - // TargetData if available, or null. - const TargetData *const TD; + // DataLayout if available, or null. + const DataLayout *const TD; // The called function. Function &F; @@ -51,9 +51,12 @@ class CallAnalyzer : public InstVisitor { int Cost; const bool AlwaysInline; - bool IsRecursive; + bool IsCallerRecursive; + bool IsRecursiveCall; bool ExposesReturnsTwice; bool HasDynamicAlloca; + /// Number of bytes allocated statically by the callee. + uint64_t AllocatedSize; unsigned NumInstructions, NumVectorInstructions; int FiftyPercentVectorBonus, TenPercentVectorBonus; int VectorBonus; @@ -123,10 +126,11 @@ class CallAnalyzer : public InstVisitor { bool visitCallSite(CallSite CS); public: - CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) + CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), - IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), + AlwaysInline(F.getFnAttributes().hasAttribute(Attributes::AlwaysInline)), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), @@ -138,6 +142,7 @@ public: int getThreshold() { return Threshold; } int getCost() { return Cost; } + bool isAlwaysInline() { return AlwaysInline; } // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. @@ -269,6 +274,13 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // FIXME: Check whether inlining will turn a dynamic alloca into a static // alloca, and handle that case. + // Accumulate the allocated size. + if (I.isStaticAlloca()) { + Type *Ty = I.getAllocatedType(); + AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : + Ty->getPrimitiveSizeInBits()); + } + // We will happily inline static alloca instructions or dynamic alloca // instructions in always-inline situations. if (AlwaysInline || I.isStaticAlloca()) @@ -602,7 +614,7 @@ bool CallAnalyzer::visitStore(StoreInst &I) { bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && - !F.hasFnAttr(Attribute::ReturnsTwice)) { + !F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; @@ -625,7 +637,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { if (F == CS.getInstruction()->getParent()->getParent()) { // This flag will fully abort the analysis, so don't bother with anything // else. - IsRecursive = true; + IsRecursiveCall = true; return false; } @@ -712,7 +724,14 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { Cost += InlineConstants::InstrCost; // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; if (NumVectorInstructions > NumInstructions/2) @@ -814,7 +833,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // one load and one store per word copied. // FIXME: The maxStoresPerMemcpy setting from the target should be used // here instead of a magic number of 8, but it's not available via - // TargetData. + // DataLayout. NumStores = std::min(NumStores, 8U); Cost -= 2 * NumStores * InlineConstants::InstrCost; @@ -831,12 +850,14 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Cost += InlineConstants::LastCallToStaticBonus; // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero cost. - if (InvokeInst *II = dyn_cast(CS.getInstruction())) { + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast(Instr)) { if (isa(II->getNormalDest()->begin())) Threshold = 1; - } else if (isa(++BasicBlock::iterator(CS.getInstruction()))) + } else if (isa(++BasicBlock::iterator(Instr))) Threshold = 1; // If this function uses the coldcc calling convention, prefer not to inline @@ -852,6 +873,20 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (F.empty()) return true; + Function *Caller = CS.getInstruction()->getParent()->getParent(); + // Check if the caller function is recursive itself. + for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); + U != E; ++U) { + CallSite Site(cast(*U)); + if (!Site) + continue; + Instruction *I = Site.getInstruction(); + if (I->getParent()->getParent() == Caller) { + IsCallerRecursive = true; + break; + } + } + // Track whether we've seen a return instruction. The first return // instruction is free, as at least one will usually disappear in inlining. bool HasReturn = false; @@ -908,9 +943,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this indirect - // jump would jump from the inlined copy of the function into the original - // function which is extremely undefined behavior. + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions // with indirectbr's as long as no other function or global references the // blockaddress of a block within the current function. And as a QOI issue, @@ -928,8 +963,16 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { - if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + break; } @@ -955,7 +998,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // If we're unable to select a particular successor, just count all of // them. - for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; + ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); // If we had any successors at this point, than post-inlining is likely to @@ -974,6 +1018,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { return AlwaysInline || Cost < Threshold; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// \brief Dump stats about this call's analysis. void CallAnalyzer::dump() { #define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" @@ -987,6 +1032,7 @@ void CallAnalyzer::dump() { DEBUG_PRINT_STAT(SROACostSavingsLost); #undef DEBUG_PRINT_STAT } +#endif InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { return getInlineCost(CS, CS.getCalledFunction(), Threshold); @@ -998,10 +1044,12 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, // something else. Don't inline functions marked noinline or call sites // marked noinline. if (!Callee || Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) + Callee->getFnAttributes().hasAttribute(Attributes::NoInline) || + CS.isNoInline()) return llvm::InlineCost::getNever(); - DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "...\n"); CallAnalyzer CA(TD, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); @@ -1011,7 +1059,8 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, // Check if there was a reason to force inlining or no inlining. if (!ShouldInline && CA.getCost() < CA.getThreshold()) return InlineCost::getNever(); - if (ShouldInline && CA.getCost() >= CA.getThreshold()) + if (ShouldInline && (CA.isAlwaysInline() || + CA.getCost() >= CA.getThreshold())) return InlineCost::getAlways(); return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 379a35a..a76e5ad 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -31,7 +31,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -42,11 +42,11 @@ STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc, "Number of reassociations"); struct Query { - const TargetData *TD; + const DataLayout *TD; const TargetLibraryInfo *TLI; const DominatorTree *DT; - Query(const TargetData *td, const TargetLibraryInfo *tli, + Query(const DataLayout *td, const TargetLibraryInfo *tli, const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {} }; @@ -651,7 +651,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const TargetLibraryInfo *TLI, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), RecursionLimit); @@ -664,7 +664,7 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// if the GEP has all-constant indices. Returns false if any non-constant /// index is encountered leaving the 'Offset' in an undefined state. The /// 'Offset' APInt must be the bitwidth of the target's pointer size. -static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP, +static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP, APInt &Offset) { unsigned IntPtrWidth = TD.getPointerSizeInBits(); assert(IntPtrWidth == Offset.getBitWidth()); @@ -696,7 +696,7 @@ static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP, /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. -static Constant *stripAndComputeConstantOffsets(const TargetData &TD, +static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Value *&V) { if (!V->getType()->isPointerTy()) return 0; @@ -731,7 +731,7 @@ static Constant *stripAndComputeConstantOffsets(const TargetData &TD, /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const TargetData &TD, +static Constant *computePointerDifference(const DataLayout &TD, Value *LHS, Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); if (!LHSOffset) @@ -880,7 +880,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const TargetLibraryInfo *TLI, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), RecursionLimit); @@ -951,7 +951,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1039,7 +1039,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1055,7 +1055,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1074,7 +1074,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1144,7 +1144,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1160,7 +1160,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1179,7 +1179,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, return 0; } -Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1248,7 +1248,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const TargetLibraryInfo *TLI, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), RecursionLimit); @@ -1275,7 +1275,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), @@ -1307,7 +1307,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), @@ -1407,7 +1407,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1501,7 +1501,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1561,7 +1561,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, return 0; } -Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); @@ -1591,7 +1591,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } -static Constant *computePointerICmp(const TargetData &TD, +static Constant *computePointerICmp(const DataLayout &TD, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { // We can only fold certain predicates on pointer comparisons. @@ -2065,8 +2065,25 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (A && C && (A == C || A == D || B == C || B == D) && NoLHSWrapProblem && NoRHSWrapProblem) { // Determine Y and Z in the form icmp (X+Y), (X+Z). - Value *Y = (A == C || A == D) ? B : A; - Value *Z = (C == A || C == B) ? D : C; + Value *Y, *Z; + if (A == C) { + // C + B == C + D -> B == D + Y = B; + Z = D; + } else if (A == D) { + // D + B == C + D -> B == C + Y = B; + Z = C; + } else if (B == C) { + // A + C == C + D -> A == D + Y = A; + Z = D; + } else { + assert(B == D); + // A + D == C + D -> A == C + Y = A; + Z = C; + } if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1)) return V; } @@ -2399,7 +2416,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), @@ -2496,7 +2513,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), @@ -2531,7 +2548,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT), @@ -2579,7 +2596,7 @@ static Value *SimplifyGEPInst(ArrayRef Ops, const Query &Q, unsigned) { return ConstantExpr::getGetElementPtr(cast(Ops[0]), Ops.slice(1)); } -Value *llvm::SimplifyGEPInst(ArrayRef Ops, const TargetData *TD, +Value *llvm::SimplifyGEPInst(ArrayRef Ops, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit); @@ -2616,7 +2633,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT), @@ -2664,7 +2681,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { return 0; } -Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD, +Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit); @@ -2730,7 +2747,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const TargetLibraryInfo *TLI, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit); } @@ -2745,7 +2762,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const TargetLibraryInfo *TLI, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), RecursionLimit); @@ -2761,7 +2778,7 @@ static Value *SimplifyCallInst(CallInst *CI, const Query &) { /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. -Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, +Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { Value *Result; @@ -2881,7 +2898,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, /// This routine returns 'true' only when *it* simplifies something. The passed /// in simplified value does not count toward this. static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { bool Simplified = false; @@ -2936,14 +2953,14 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, } bool llvm::recursivelySimplifyInstruction(Instruction *I, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 9140786..2b87d80 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -13,13 +13,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lazy-value-info" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ConstantRange.h" @@ -212,7 +213,7 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. + // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding. if (ConstantInt *Res = dyn_cast( ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, getConstant(), @@ -238,7 +239,7 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. + // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding. if (ConstantInt *Res = dyn_cast( ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, getNotConstant(), @@ -294,7 +295,7 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { //===----------------------------------------------------------------------===// namespace { - /// LVIValueHandle - A callback value handle update the cache when + /// LVIValueHandle - A callback value handle updates the cache when /// values are erased. class LazyValueInfoCache; struct LVIValueHandle : public CallbackVH { @@ -470,8 +471,10 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { return true; LVIValueHandle ValHandle(Val, this); - if (!ValueCache.count(ValHandle)) return false; - return ValueCache[ValHandle].count(BB); + std::map::iterator I = + ValueCache.find(ValHandle); + if (I == ValueCache.end()) return false; + return I->second.count(BB); } LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { @@ -555,13 +558,11 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { if (LoadInst *L = dyn_cast(I)) { return L->getPointerAddressSpace() == 0 && - GetUnderlyingObject(L->getPointerOperand()) == - GetUnderlyingObject(Ptr); + GetUnderlyingObject(L->getPointerOperand()) == Ptr; } if (StoreInst *S = dyn_cast(I)) { return S->getPointerAddressSpace() == 0 && - GetUnderlyingObject(S->getPointerOperand()) == - GetUnderlyingObject(Ptr); + GetUnderlyingObject(S->getPointerOperand()) == Ptr; } if (MemIntrinsic *MI = dyn_cast(I)) { if (MI->isVolatile()) return false; @@ -571,11 +572,11 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { if (!Len || Len->isZero()) return false; if (MI->getDestAddressSpace() == 0) - if (MI->getRawDest() == Ptr || MI->getDest() == Ptr) + if (GetUnderlyingObject(MI->getRawDest()) == Ptr) return true; if (MemTransferInst *MTI = dyn_cast(MI)) if (MTI->getSourceAddressSpace() == 0) - if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr) + if (GetUnderlyingObject(MTI->getRawSource()) == Ptr) return true; } return false; @@ -589,13 +590,19 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, // then we know that the pointer can't be NULL. bool NotNull = false; if (Val->getType()->isPointerTy()) { - if (isa(Val)) { + if (isKnownNonNull(Val)) { NotNull = true; } else { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){ - if (InstructionDereferencesPointer(BI, Val)) { - NotNull = true; - break; + Value *UnderlyingVal = GetUnderlyingObject(Val); + // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge + // inside InstructionDereferencesPointer either. + if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + if (InstructionDereferencesPointer(BI, UnderlyingVal)) { + NotNull = true; + break; + } } } } @@ -845,9 +852,12 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { ConstantRange EdgeVal(i.getCaseValue()->getValue()); - if (DefaultCase) - EdgesVals = EdgesVals.difference(EdgeVal); - else if (i.getCaseSuccessor() == BBTo) + if (DefaultCase) { + // It is possible that the default destination is the destination of + // some cases. There is no need to perform difference for those cases. + if (i.getCaseSuccessor() != BBTo) + EdgesVals = EdgesVals.difference(EdgeVal); + } else if (i.getCaseSuccessor() == BBTo) EdgesVals = EdgesVals.unionWith(EdgeVal); } Result = LVILatticeVal::getRange(EdgesVals); @@ -1004,7 +1014,7 @@ bool LazyValueInfo::runOnFunction(Function &F) { if (PImpl) getCache(PImpl).clear(); - TD = getAnalysisIfAvailable(); + TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); // Fully lazy. diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 83bdf52..6d6d580 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -43,7 +43,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" @@ -103,7 +103,7 @@ namespace { Module *Mod; AliasAnalysis *AA; DominatorTree *DT; - TargetData *TD; + DataLayout *TD; TargetLibraryInfo *TLI; std::string Messages; @@ -177,7 +177,7 @@ bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); AA = &getAnalysis(); DT = &getAnalysis(); - TD = getAnalysisIfAvailable(); + TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); visit(F); dbgs() << MessagesStr.str(); @@ -411,14 +411,50 @@ void Lint::visitMemoryReference(Instruction &I, "Undefined behavior: Branch to non-blockaddress", &I); } + // Check for buffer overflows and misalignment. if (TD) { - if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty); + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast(Base)) { + Type *ATy = AI->getAllocatedType(); + if (!AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); + } + } - if (Align != 0) { - unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD); - Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), "Undefined behavior: Memory reference address is misaligned", &I); } } @@ -470,7 +506,7 @@ void Lint::visitShl(BinaryOperator &I) { "Undefined result: Shift count out of range", &I); } -static bool isZero(Value *V, TargetData *TD) { +static bool isZero(Value *V, DataLayout *TD) { // Assume undef could be zero. if (isa(V)) return true; diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 873a275..73aa8b4 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -13,7 +13,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" @@ -52,8 +52,8 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { /// bitcasts to get back to the underlying object being addressed, keeping /// track of the offset in bytes from the GEPs relative to the result. /// This is closely related to GetUnderlyingObject but is located -/// here to avoid making VMCore depend on TargetData. -static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, +/// here to avoid making VMCore depend on DataLayout. +static Value *getUnderlyingObjectWithOffset(Value *V, const DataLayout *TD, uint64_t &ByteOffset, unsigned MaxLookup = 6) { if (!V->getType()->isPointerTy()) @@ -85,7 +85,7 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const TargetData *TD) { + unsigned Align, const DataLayout *TD) { uint64_t ByteOffset = 0; Value *Base = V; if (TD) diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp deleted file mode 100644 index 463269d..0000000 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ /dev/null @@ -1,362 +0,0 @@ -//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the (beginning) of an implementation of a loop dependence analysis -// framework, which is used to detect dependences in memory accesses in loops. -// -// Please note that this is work in progress and the interface is subject to -// change. -// -// TODO: adapt as implementation progresses. -// -// TODO: document lingo (pair, subscript, index) -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "lda" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LoopDependenceAnalysis.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Instructions.h" -#include "llvm/Operator.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" -using namespace llvm; - -STATISTIC(NumAnswered, "Number of dependence queries answered"); -STATISTIC(NumAnalysed, "Number of distinct dependence pairs analysed"); -STATISTIC(NumDependent, "Number of pairs with dependent accesses"); -STATISTIC(NumIndependent, "Number of pairs with independent accesses"); -STATISTIC(NumUnknown, "Number of pairs with unknown accesses"); - -LoopPass *llvm::createLoopDependenceAnalysisPass() { - return new LoopDependenceAnalysis(); -} - -INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda", - "Loop Dependence Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda", - "Loop Dependence Analysis", false, true) -char LoopDependenceAnalysis::ID = 0; - -//===----------------------------------------------------------------------===// -// Utility Functions -//===----------------------------------------------------------------------===// - -static inline bool IsMemRefInstr(const Value *V) { - const Instruction *I = dyn_cast(V); - return I && (I->mayReadFromMemory() || I->mayWriteToMemory()); -} - -static void GetMemRefInstrs(const Loop *L, - SmallVectorImpl &Memrefs) { - for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); - b != be; ++b) - for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end(); - i != ie; ++i) - if (IsMemRefInstr(i)) - Memrefs.push_back(i); -} - -static bool IsLoadOrStoreInst(Value *I) { - // Returns true if the load or store can be analyzed. Atomic and volatile - // operations have properties which this analysis does not understand. - if (LoadInst *LI = dyn_cast(I)) - return LI->isUnordered(); - else if (StoreInst *SI = dyn_cast(I)) - return SI->isUnordered(); - return false; -} - -static Value *GetPointerOperand(Value *I) { - if (LoadInst *i = dyn_cast(I)) - return i->getPointerOperand(); - if (StoreInst *i = dyn_cast(I)) - return i->getPointerOperand(); - llvm_unreachable("Value is no load or store instruction!"); -} - -static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, - const Value *A, - const Value *B) { - const Value *aObj = GetUnderlyingObject(A); - const Value *bObj = GetUnderlyingObject(B); - return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()), - bObj, AA->getTypeStoreSize(bObj->getType())); -} - -static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) { - return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L); -} - -//===----------------------------------------------------------------------===// -// Dependence Testing -//===----------------------------------------------------------------------===// - -bool LoopDependenceAnalysis::isDependencePair(const Value *A, - const Value *B) const { - return IsMemRefInstr(A) && - IsMemRefInstr(B) && - (cast(A)->mayWriteToMemory() || - cast(B)->mayWriteToMemory()); -} - -bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A, - Value *B, - DependencePair *&P) { - void *insertPos = 0; - FoldingSetNodeID id; - id.AddPointer(A); - id.AddPointer(B); - - P = Pairs.FindNodeOrInsertPos(id, insertPos); - if (P) return true; - - P = new (PairAllocator) DependencePair(id, A, B); - Pairs.InsertNode(P, insertPos); - return false; -} - -void LoopDependenceAnalysis::getLoops(const SCEV *S, - DenseSet* Loops) const { - // Refactor this into an SCEVVisitor, if efficiency becomes a concern. - for (const Loop *L = this->L; L != 0; L = L->getParentLoop()) - if (!SE->isLoopInvariant(S, L)) - Loops->insert(L); -} - -bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const { - DenseSet loops; - getLoops(S, &loops); - return loops.empty(); -} - -bool LoopDependenceAnalysis::isAffine(const SCEV *S) const { - const SCEVAddRecExpr *rec = dyn_cast(S); - return isLoopInvariant(S) || (rec && rec->isAffine()); -} - -bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const { - return isLoopInvariant(A) && isLoopInvariant(B); -} - -bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const { - DenseSet loops; - getLoops(A, &loops); - getLoops(B, &loops); - return loops.size() == 1; -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analyseZIV(const SCEV *A, - const SCEV *B, - Subscript *S) const { - assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!"); - return A == B ? Dependent : Independent; -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analyseSIV(const SCEV *A, - const SCEV *B, - Subscript *S) const { - return Unknown; // TODO: Implement. -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analyseMIV(const SCEV *A, - const SCEV *B, - Subscript *S) const { - return Unknown; // TODO: Implement. -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analyseSubscript(const SCEV *A, - const SCEV *B, - Subscript *S) const { - DEBUG(dbgs() << " Testing subscript: " << *A << ", " << *B << "\n"); - - if (A == B) { - DEBUG(dbgs() << " -> [D] same SCEV\n"); - return Dependent; - } - - if (!isAffine(A) || !isAffine(B)) { - DEBUG(dbgs() << " -> [?] not affine\n"); - return Unknown; - } - - if (isZIVPair(A, B)) - return analyseZIV(A, B, S); - - if (isSIVPair(A, B)) - return analyseSIV(A, B, S); - - return analyseMIV(A, B, S); -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analysePair(DependencePair *P) const { - DEBUG(dbgs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n"); - - // We only analyse loads and stores but no possible memory accesses by e.g. - // free, call, or invoke instructions. - if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) { - DEBUG(dbgs() << "--> [?] no load/store\n"); - return Unknown; - } - - Value *aPtr = GetPointerOperand(P->A); - Value *bPtr = GetPointerOperand(P->B); - - switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) { - case AliasAnalysis::MayAlias: - case AliasAnalysis::PartialAlias: - // We can not analyse objects if we do not know about their aliasing. - DEBUG(dbgs() << "---> [?] may alias\n"); - return Unknown; - - case AliasAnalysis::NoAlias: - // If the objects noalias, they are distinct, accesses are independent. - DEBUG(dbgs() << "---> [I] no alias\n"); - return Independent; - - case AliasAnalysis::MustAlias: - break; // The underlying objects alias, test accesses for dependence. - } - - const GEPOperator *aGEP = dyn_cast(aPtr); - const GEPOperator *bGEP = dyn_cast(bPtr); - - if (!aGEP || !bGEP) - return Unknown; - - // FIXME: Is filtering coupled subscripts necessary? - - // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding - // trailing zeroes to the smaller GEP, if needed. - typedef SmallVector, 4> GEPOpdPairsTy; - GEPOpdPairsTy opds; - for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(), - aEnd = aGEP->idx_end(), - bIdx = bGEP->idx_begin(), - bEnd = bGEP->idx_end(); - aIdx != aEnd && bIdx != bEnd; - aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) { - const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE); - const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE); - opds.push_back(std::make_pair(aSCEV, bSCEV)); - } - - if (!opds.empty() && opds[0].first != opds[0].second) { - // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting - // - // TODO: this could be relaxed by adding the size of the underlying object - // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we - // know that x is a [100 x i8]*, we could modify the first subscript to be - // (i, 200-i) instead of (i, -i). - return Unknown; - } - - // Now analyse the collected operand pairs (skipping the GEP ptr offsets). - for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end(); - i != end; ++i) { - Subscript subscript; - DependenceResult result = analyseSubscript(i->first, i->second, &subscript); - if (result != Dependent) { - // We either proved independence or failed to analyse this subscript. - // Further subscripts will not improve the situation, so abort early. - return result; - } - P->Subscripts.push_back(subscript); - } - // We successfully analysed all subscripts but failed to prove independence. - return Dependent; -} - -bool LoopDependenceAnalysis::depends(Value *A, Value *B) { - assert(isDependencePair(A, B) && "Values form no dependence pair!"); - ++NumAnswered; - - DependencePair *p; - if (!findOrInsertDependencePair(A, B, p)) { - // The pair is not cached, so analyse it. - ++NumAnalysed; - switch (p->Result = analysePair(p)) { - case Dependent: ++NumDependent; break; - case Independent: ++NumIndependent; break; - case Unknown: ++NumUnknown; break; - } - } - return p->Result != Independent; -} - -//===----------------------------------------------------------------------===// -// LoopDependenceAnalysis Implementation -//===----------------------------------------------------------------------===// - -bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) { - this->L = L; - AA = &getAnalysis(); - SE = &getAnalysis(); - return false; -} - -void LoopDependenceAnalysis::releaseMemory() { - Pairs.clear(); - PairAllocator.Reset(); -} - -void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequiredTransitive(); - AU.addRequiredTransitive(); -} - -static void PrintLoopInfo(raw_ostream &OS, - LoopDependenceAnalysis *LDA, const Loop *L) { - if (!L->empty()) return; // ignore non-innermost loops - - SmallVector memrefs; - GetMemRefInstrs(L, memrefs); - - OS << "Loop at depth " << L->getLoopDepth() << ", header block: "; - WriteAsOperand(OS, L->getHeader(), false); - OS << "\n"; - - OS << " Load/store instructions: " << memrefs.size() << "\n"; - for (SmallVector::const_iterator x = memrefs.begin(), - end = memrefs.end(); x != end; ++x) - OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n"; - - OS << " Pairwise dependence results:\n"; - for (SmallVector::const_iterator x = memrefs.begin(), - end = memrefs.end(); x != end; ++x) - for (SmallVector::const_iterator y = x + 1; - y != end; ++y) - if (LDA->isDependencePair(*x, *y)) - OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin()) - << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent") - << "\n"; -} - -void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const { - // TODO: doc why const_cast is safe - PrintLoopInfo(OS, const_cast(this), this->L); -} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 20c33a3..8341f9d 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -306,9 +306,11 @@ BasicBlock *Loop::getUniqueExitBlock() const { return 0; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void Loop::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // UnloopUpdater implementation @@ -429,8 +431,8 @@ void UnloopUpdater::updateSubloopParents() { Unloop->removeChildLoop(llvm::prior(Unloop->end())); assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop"); - if (SubloopParents[Subloop]) - SubloopParents[Subloop]->addChildLoop(Subloop); + if (Loop *Parent = SubloopParents[Subloop]) + Parent->addChildLoop(Subloop); else LI->addTopLevelLoop(Subloop); } @@ -456,9 +458,8 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { assert(Subloop && "subloop is not an ancestor of the original loop"); } // Get the current nearest parent of the Subloop exits, initially Unloop. - if (!SubloopParents.count(Subloop)) - SubloopParents[Subloop] = Unloop; - NearLoop = SubloopParents[Subloop]; + NearLoop = + SubloopParents.insert(std::make_pair(Subloop, Unloop)).first->second; } succ_iterator I = succ_begin(BB), E = succ_end(BB); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index e77d2ff..0a539fe 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -25,7 +25,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -39,7 +40,7 @@ enum AllocType { }; struct AllocFnsTy { - const char *Name; + LibFunc::Func Func; AllocType AllocTy; unsigned char NumParams; // First and Second size parameters (or -1 if unused) @@ -49,22 +50,22 @@ struct AllocFnsTy { // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to // know which functions are nounwind, noalias, nocapture parameters, etc. static const AllocFnsTy AllocationFnData[] = { - {"malloc", MallocLike, 1, 0, -1}, - {"valloc", MallocLike, 1, 0, -1}, - {"_Znwj", MallocLike, 1, 0, -1}, // new(unsigned int) - {"_ZnwjRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) - {"_Znwm", MallocLike, 1, 0, -1}, // new(unsigned long) - {"_ZnwmRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) - {"_Znaj", MallocLike, 1, 0, -1}, // new[](unsigned int) - {"_ZnajRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) - {"_Znam", MallocLike, 1, 0, -1}, // new[](unsigned long) - {"_ZnamRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) - {"posix_memalign", MallocLike, 3, 2, -1}, - {"calloc", CallocLike, 2, 0, 1}, - {"realloc", ReallocLike, 2, 1, -1}, - {"reallocf", ReallocLike, 2, 1, -1}, - {"strdup", StrDupLike, 1, -1, -1}, - {"strndup", StrDupLike, 2, 1, -1} + {LibFunc::malloc, MallocLike, 1, 0, -1}, + {LibFunc::valloc, MallocLike, 1, 0, -1}, + {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long) + {LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) + {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long) + {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {LibFunc::posix_memalign, MallocLike, 3, 2, -1}, + {LibFunc::calloc, CallocLike, 2, 0, 1}, + {LibFunc::realloc, ReallocLike, 2, 1, -1}, + {LibFunc::reallocf, ReallocLike, 2, 1, -1}, + {LibFunc::strdup, StrDupLike, 1, -1, -1}, + {LibFunc::strndup, StrDupLike, 2, 1, -1} }; @@ -85,15 +86,22 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { /// \brief Returns the allocation data for the given value if it is a call to a /// known allocation function, and NULL otherwise. static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, + const TargetLibraryInfo *TLI, bool LookThroughBitCast = false) { Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) return 0; + // Make sure that the function is available. + StringRef FnName = Callee->getName(); + LibFunc::Func TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) + return 0; + unsigned i = 0; bool found = false; for ( ; i < array_lengthof(AllocationFnData); ++i) { - if (Callee->getName() == AllocationFnData[i].Name) { + if (AllocationFnData[i].Func == TLIFn) { found = true; break; } @@ -106,7 +114,6 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, return 0; // Check function prototype. - // FIXME: Check the nobuiltin metadata?? (PR5130) int FstParam = FnData->FstParam; int SndParam = FnData->SndParam; FunctionType *FTy = Callee->getFunctionType(); @@ -125,64 +132,72 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); - return CS && CS.hasFnAttr(Attribute::NoAlias); + return CS && CS.hasFnAttr(Attributes::NoAlias); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). -bool llvm::isAllocationFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, AnyAlloc, LookThroughBitCast); +bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). -bool llvm::isNoAliasFn(const Value *V, bool LookThroughBitCast) { +bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { // it's safe to consider realloc as noalias since accessing the original // pointer is undefined behavior - return isAllocationFn(V, LookThroughBitCast) || + return isAllocationFn(V, TLI, LookThroughBitCast) || hasNoAliasAttr(V, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory (such as malloc). -bool llvm::isMallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, MallocLike, LookThroughBitCast); +bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). -bool llvm::isCallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, CallocLike, LookThroughBitCast); +bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, CallocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). -bool llvm::isAllocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, AllocLike, LookThroughBitCast); +bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AllocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// reallocates memory (such as realloc). -bool llvm::isReallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, ReallocLike, LookThroughBitCast); +bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); } /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall(const Value *I) { - return isMallocLikeFn(I) ? dyn_cast(I) : 0; +const CallInst *llvm::extractMallocCall(const Value *I, + const TargetLibraryInfo *TLI) { + return isMallocLikeFn(I, TLI) ? dyn_cast(I) : 0; } -static Value *computeArraySize(const CallInst *CI, const TargetData *TD, +static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, + const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) return NULL; // The size of the malloc's result type must be known to determine array size. - Type *T = getMallocAllocatedType(CI); + Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !TD) return NULL; @@ -204,9 +219,11 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD, /// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. -const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { - const CallInst *CI = extractMallocCall(I); - Value *ArraySize = computeArraySize(CI, TD); +const CallInst *llvm::isArrayMalloc(const Value *I, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + const CallInst *CI = extractMallocCall(I, TLI); + Value *ArraySize = computeArraySize(CI, TD, TLI); if (ArraySize && ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) @@ -221,8 +238,9 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { /// 0: PointerType is the calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -PointerType *llvm::getMallocType(const CallInst *CI) { - assert(isMallocLikeFn(CI) && "getMallocType and not malloc call"); +PointerType *llvm::getMallocType(const CallInst *CI, + const TargetLibraryInfo *TLI) { + assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); PointerType *MallocType = NULL; unsigned NumOfBitCastUses = 0; @@ -252,8 +270,9 @@ PointerType *llvm::getMallocType(const CallInst *CI) { /// 0: PointerType is the malloc calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -Type *llvm::getMallocAllocatedType(const CallInst *CI) { - PointerType *PT = getMallocType(CI); +Type *llvm::getMallocAllocatedType(const CallInst *CI, + const TargetLibraryInfo *TLI) { + PointerType *PT = getMallocType(CI, TLI); return PT ? PT->getElementType() : NULL; } @@ -262,22 +281,24 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI) { /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. -Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, +Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *TD, + const TargetLibraryInfo *TLI, bool LookThroughSExt) { - assert(isMallocLikeFn(CI) && "getMallocArraySize and not malloc call"); - return computeArraySize(CI, TD, LookThroughSExt); + assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); + return computeArraySize(CI, TD, TLI, LookThroughSExt); } /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. -const CallInst *llvm::extractCallocCall(const Value *I) { - return isCallocLikeFn(I) ? cast(I) : 0; +const CallInst *llvm::extractCallocCall(const Value *I, + const TargetLibraryInfo *TLI) { + return isCallocLikeFn(I, TLI) ? cast(I) : 0; } /// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *llvm::isFreeCall(const Value *I) { +const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast(I); if (!CI) return 0; @@ -285,9 +306,14 @@ const CallInst *llvm::isFreeCall(const Value *I) { if (Callee == 0 || !Callee->isDeclaration()) return 0; - if (Callee->getName() != "free" && - Callee->getName() != "_ZdlPv" && // operator delete(void*) - Callee->getName() != "_ZdaPv") // operator delete[](void*) + StringRef FnName = Callee->getName(); + LibFunc::Func TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) + return 0; + + if (TLIFn != LibFunc::free && + TLIFn != LibFunc::ZdlPv && // operator delete(void*) + TLIFn != LibFunc::ZdaPv) // operator delete[](void*) return 0; // Check free prototype. @@ -315,12 +341,12 @@ const CallInst *llvm::isFreeCall(const Value *I) { /// object size in Size if successful, and false otherwise. /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. -bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const TargetData *TD, - bool RoundToAlign) { +bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, + const TargetLibraryInfo *TLI, bool RoundToAlign) { if (!TD) return false; - ObjectSizeOffsetVisitor Visitor(TD, Ptr->getContext(), RoundToAlign); + ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); SizeOffsetType Data = Visitor.compute(const_cast(Ptr)); if (!Visitor.bothKnown(Data)) return false; @@ -347,10 +373,11 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { return Size; } -ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD, +ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, + const TargetLibraryInfo *TLI, LLVMContext &Context, bool RoundToAlign) -: TD(TD), RoundToAlign(RoundToAlign) { +: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) { IntegerType *IntTy = TD->getIntPtrType(Context); IntTyBits = IntTy->getBitWidth(); Zero = APInt::getNullValue(IntTyBits); @@ -358,11 +385,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); + if (Instruction *I = dyn_cast(V)) { + // If we have already seen this instruction, bail out. Cycles can happen in + // unreachable code after constant propagation. + if (!SeenInsts.insert(I)) + return unknown(); - if (GEPOperator *GEP = dyn_cast(V)) - return visitGEPOperator(*GEP); - if (Instruction *I = dyn_cast(V)) + if (GEPOperator *GEP = dyn_cast(V)) + return visitGEPOperator(*GEP); return visit(*I); + } if (Argument *A = dyn_cast(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast(V)) @@ -371,9 +403,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { return visitGlobalVariable(*GV); if (UndefValue *UV = dyn_cast(V)) return visitUndefValue(*UV); - if (ConstantExpr *CE = dyn_cast(V)) + if (ConstantExpr *CE = dyn_cast(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless + if (CE->getOpcode() == Instruction::GetElementPtr) + return visitGEPOperator(cast(*CE)); + } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V << '\n'); @@ -408,7 +443,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { } SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { - const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, + TLI); if (!FnData) return unknown(); @@ -473,10 +509,6 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { } SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { - // Ignore self-referencing GEPs, they can occur in unreachable code. - if (&GEP == GEP.getPointerOperand()) - return unknown(); - SizeOffsetType PtrData = compute(GEP.getPointerOperand()); if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices()) return unknown(); @@ -510,10 +542,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { } SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { - // ignore malformed self-looping selects - if (I.getTrueValue() == &I || I.getFalseValue() == &I) - return unknown(); - SizeOffsetType TrueSide = compute(I.getTrueValue()); SizeOffsetType FalseSide = compute(I.getFalseValue()); if (bothKnown(TrueSide) && bothKnown(FalseSide) && TrueSide == FalseSide) @@ -531,10 +559,10 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { } -ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const TargetData *TD, +ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD, + const TargetLibraryInfo *TLI, LLVMContext &Context) -: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)), -Visitor(TD, Context) { +: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) { IntTy = TD->getIntPtrType(Context); Zero = ConstantInt::get(IntTy, 0); } @@ -559,6 +587,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { + ObjectSizeOffsetVisitor Visitor(TD, TLI, Context); SizeOffsetType Const = Visitor.compute(V); if (Visitor.bothKnown(Const)) return std::make_pair(ConstantInt::get(Context, Const.first), @@ -621,7 +650,8 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { - const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, + TLI); if (!FnData) return unknown(); @@ -719,10 +749,6 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) { - // ignore malformed self-looping selects - if (I.getTrueValue() == &I || I.getFalseValue() == &I) - return unknown(); - SizeOffsetEvalType TrueSide = compute_(I.getTrueValue()); SizeOffsetEvalType FalseSide = compute_(I.getFalseValue()); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 059e574..9872890 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -30,7 +30,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/PredIteratorCache.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); @@ -89,7 +89,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis(); - TD = getAnalysisIfAvailable(); + TD = getAnalysisIfAvailable(); DT = getAnalysisIfAvailable(); if (PredCache == 0) PredCache.reset(new PredIteratorCache()); @@ -148,7 +148,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, return AliasAnalysis::ModRef; } - if (const CallInst *CI = isFreeCall(Inst)) { + if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { // calls to free() deallocate the entire structure Loc = AliasAnalysis::Location(CI->getArgOperand(0)); return AliasAnalysis::Mod; @@ -256,7 +256,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, int64_t &MemLocOffs, const LoadInst *LI, - const TargetData *TD) { + const DataLayout *TD) { // If we have no target data, we can't do this. if (TD == 0) return false; @@ -280,7 +280,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, unsigned MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI, - const TargetData &TD) { + const DataLayout &TD) { // We can only extend simple integer loads. if (!isa(LI->getType()) || !LI->isSimple()) return 0; @@ -327,12 +327,12 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, return 0; if (LIOffs+NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) { + LI->getParent()->getParent()->getFnAttributes(). + hasAttribute(Attributes::AddressSafety)) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. return 0; - } // If a load of this width would include all of MemLoc, then we succeed. if (LIOffs+NewLoadByteSize >= MemLocEnd) @@ -479,12 +479,20 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // a subsequent bitcast of the malloc call result. There can be stores to // the malloced memory between the malloc call and its bitcast uses, and we // need to continue scanning until the malloc call. - if (isa(Inst) || isNoAliasFn(Inst)) { + const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); + if (isa(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); - continue; + // Be conservative if the accessed pointer may alias the allocation. + if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias) + return MemDepResult::getClobber(Inst); + // If the allocation is not aliased and does not read memory (like + // strdup), it is safe to ignore. + if (isa(Inst) || + isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI)) + continue; } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. @@ -975,7 +983,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { Visited.insert(std::make_pair(I->getBB(), Addr)); - if (!I->getResult().isNonLocal()) + if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB())) Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); } ++NumCacheCompleteNonLocalPtr; @@ -1021,7 +1029,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. - if (!Dep.isNonLocal()) { + if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) { Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); continue; } diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 101c2d5..2eb4137 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -15,7 +15,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; namespace { @@ -36,7 +36,7 @@ namespace { virtual void initializePass() { // Note: NoAA does not call InitializeAliasAnalysis because it's // special and does not support chaining. - TD = getAnalysisIfAvailable(); + TD = getAnalysisIfAvailable(); } virtual AliasResult alias(const Location &LocA, const Location &LocB) { diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 38cb1c9..c35737e 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -41,6 +41,7 @@ static bool CanPHITrans(Instruction *Inst) { return false; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void PHITransAddr::dump() const { if (Addr == 0) { dbgs() << "PHITransAddr: null\n"; @@ -50,6 +51,7 @@ void PHITransAddr::dump() const { for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n"; } +#endif static bool VerifySubExpr(Value *Expr, diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp new file mode 100644 index 0000000..a4f634a --- /dev/null +++ b/lib/Analysis/ProfileDataLoader.cpp @@ -0,0 +1,155 @@ +//===- ProfileDataLoader.cpp - Load profile information from disk ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileDataLoader class is used to load raw profiling data from the dump +// file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Module.h" +#include "llvm/InstrTypes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Analysis/ProfileDataTypes.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include +#include +using namespace llvm; + +raw_ostream &llvm::operator<<(raw_ostream &O, std::pair E) { + O << "("; + + if (E.first) + O << E.first->getName(); + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second->getName(); + else + O << "0"; + + return O << ")"; +} + +/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one +/// (or both) may not be defined. +static unsigned AddCounts(unsigned A, unsigned B) { + // If either value is undefined, use the other. + // Undefined + undefined = undefined. + if (A == ProfileDataLoader::Uncounted) return B; + if (B == ProfileDataLoader::Uncounted) return A; + + return A + B; +} + +/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F' +template +static void ReadProfilingData(const char *ToolName, FILE *F, + T *Data, size_t NumEntries) { + // Read in the block of data... + if (fread(Data, sizeof(T), NumEntries, F) != NumEntries) + report_fatal_error(Twine(ToolName) + ": Profiling data truncated"); +} + +/// ReadProfilingNumEntries - Read how many entries are in this profiling data +/// packet. +static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F, + bool ShouldByteSwap) { + unsigned Entry; + ReadProfilingData(ToolName, F, &Entry, 1); + return ShouldByteSwap ? ByteSwap_32(Entry) : Entry; +} + +/// ReadProfilingBlock - Read the number of entries in the next profiling data +/// packet and then accumulate the entries into 'Data'. +static void ReadProfilingBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + SmallVector &Data) { + // Read the number of entries... + unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + + // Read in the data. + SmallVector TempSpace(NumEntries); + ReadProfilingData(ToolName, F, TempSpace.data(), NumEntries); + + // Make sure we have enough space ... + if (Data.size() < NumEntries) + Data.resize(NumEntries, ProfileDataLoader::Uncounted); + + // Accumulate the data we just read into the existing data. + for (unsigned i = 0; i < NumEntries; ++i) { + unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i]; + Data[i] = AddCounts(Entry, Data[i]); + } +} + +/// ReadProfilingArgBlock - Read the command line arguments that the progam was +/// run with when the current profiling data packet(s) were generated. +static void ReadProfilingArgBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + SmallVector &CommandLines) { + // Read the number of bytes ... + unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + + // Read in the arguments (if there are any to read). Round up the length to + // the nearest 4-byte multiple. + SmallVector Args(ArgLength+4); + if (ArgLength) + ReadProfilingData(ToolName, F, Args.data(), (ArgLength+3) & ~3); + + // Store the arguments. + CommandLines.push_back(std::string(&Args[0], &Args[ArgLength])); +} + +const unsigned ProfileDataLoader::Uncounted = ~0U; + +/// ProfileDataLoader ctor - Read the specified profiling data file, reporting +/// a fatal error if the file is invalid or broken. +ProfileDataLoader::ProfileDataLoader(const char *ToolName, + const std::string &Filename) + : Filename(Filename) { + FILE *F = fopen(Filename.c_str(), "rb"); + if (F == 0) + report_fatal_error(Twine(ToolName) + ": Error opening '" + + Filename + "': "); + + // Keep reading packets until we run out of them. + unsigned PacketType; + while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { + // If the low eight bits of the packet are zero, we must be dealing with an + // endianness mismatch. Byteswap all words read from the profiling + // information. This can happen when the compiler host and target have + // different endianness. + bool ShouldByteSwap = (char)PacketType == 0; + PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType; + + switch (PacketType) { + case ArgumentInfo: + ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines); + break; + + case EdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); + break; + + default: + report_fatal_error(std::string(ToolName) + + ": Unknown profiling packet type"); + break; + } + } + + fclose(F); +} diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp new file mode 100644 index 0000000..c43cff0 --- /dev/null +++ b/lib/Analysis/ProfileDataLoaderPass.cpp @@ -0,0 +1,188 @@ +//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loads profiling data from a dump file and sets branch weight +// metadata. +// +// TODO: Replace all "profile-metadata-loader" strings with "profile-loader" +// once ProfileInfo etc. has been removed. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-metadata-loader" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BasicBlock.h" +#include "llvm/InstrTypes.h" +#include "llvm/Module.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumEdgesRead, "The # of edges read."); +STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated."); + +static cl::opt +ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Profile file loaded by -profile-metadata-loader")); + +namespace { + /// This pass loads profiling data from a dump file and sets branch weight + /// metadata. + class ProfileMetadataLoaderPass : public ModulePass { + std::string Filename; + public: + static char ID; // Class identification, replacement for typeinfo + explicit ProfileMetadataLoaderPass(const std::string &filename = "") + : ModulePass(ID), Filename(filename) { + initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry()); + if (filename.empty()) Filename = ProfileMetadataFilename; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "Profile loader"; + } + + virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge, + ArrayRef); + virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef); + virtual void setBranchWeightMetadata(Module&, ProfileData&); + + virtual bool runOnModule(Module &M); + }; +} // End of anonymous namespace + +char ProfileMetadataLoaderPass::ID = 0; +INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader", + "Load profile information from llvmprof.out", false, true) +INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader", + "Load profile information from llvmprof.out", false, true) + +char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID; + +/// createProfileMetadataLoaderPass - This function returns a Pass that loads +/// the profiling information for the module from the specified filename, +/// making it available to the optimizers. +ModulePass *llvm::createProfileMetadataLoaderPass() { + return new ProfileMetadataLoaderPass(); +} +ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) { + return new ProfileMetadataLoaderPass(Filename); +} + +/// readEdge - Take the value from a profile counter and assign it to an edge. +void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount, + ProfileData &PB, ProfileData::Edge e, + ArrayRef Counters) { + if (ReadCount >= Counters.size()) return; + + unsigned weight = Counters[ReadCount]; + assert(weight != ProfileDataLoader::Uncounted); + PB.addEdgeWeight(e, weight); + + DEBUG(dbgs() << "-- Read Edge Counter for " << e + << " (# "<< (ReadCount) << "): " + << PB.getEdgeWeight(e) << "\n"); +} + +/// matchEdges - Link every profile counter with an edge. +unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB, + ArrayRef Counters) { + if (Counters.size() == 0) return 0; + + unsigned ReadCount = 0; + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n"); + readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)), + Counters); + } + } + } + + return ReadCount; +} + +/// setBranchWeightMetadata - Translate the counter values associated with each +/// edge into branch weights for each conditional branch (a branch with 2 or +/// more desinations). +void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M, + ProfileData &PB) { + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n"); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + unsigned NumSuccessors = TI->getNumSuccessors(); + + // If there is only one successor then we can not set a branch + // probability as the target is certain. + if (NumSuccessors < 2) continue; + + // Load the weights of all edges leading from this terminator. + DEBUG(dbgs() << "-- Terminator with " << NumSuccessors + << " successors:\n"); + SmallVector Weights(NumSuccessors); + for (unsigned s = 0 ; s < NumSuccessors ; ++s) { + ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s)); + Weights[s] = (uint32_t)PB.getEdgeWeight(edge); + DEBUG(dbgs() << "---- Edge '" << edge << "' has weight " + << Weights[s] << "\n"); + } + + // Set branch weight metadata. This will set branch probabilities of + // 100%/0% if that is true of the dynamic execution. + // BranchProbabilityInfo can account for this when it loads this metadata + // (it gives the unexectuted branch a weight of 1 for the purposes of + // probability calculations). + MDBuilder MDB(TI->getContext()); + MDNode *Node = MDB.createBranchWeights(Weights); + TI->setMetadata(LLVMContext::MD_prof, Node); + NumTermsAnnotated++; + } + } +} + +bool ProfileMetadataLoaderPass::runOnModule(Module &M) { + ProfileDataLoader PDL("profile-data-loader", Filename); + ProfileData PB; + + ArrayRef Counters = PDL.getRawEdgeCounts(); + + unsigned ReadCount = matchEdges(M, PB, Counters); + + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + + setBranchWeightMetadata(M, PB); + + return ReadCount > 0; +} diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index 63468f8..12b59e0 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -286,7 +286,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { } } - double fraction = floor(BBWeight/Edges.size()); + double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0; // Finally we know what flow is still not leaving the block, distribute this // flow onto the empty edges. for (SmallVector::iterator ei = Edges.begin(), ee = Edges.end(); diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 173de2c..b5b7ac1 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -1016,40 +1016,14 @@ void ProfileInfoT::repair(const Function *F) { } } -raw_ostream& operator<<(raw_ostream &O, const Function *F) { - return O << F->getName(); -} - raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { return O << MF->getFunction()->getName() << "(MF)"; } -raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { - return O << BB->getName(); -} - raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { return O << MBB->getBasicBlock()->getName() << "(MB)"; } -raw_ostream& operator<<(raw_ostream &O, std::pair E) { - O << "("; - - if (E.first) - O << E.first; - else - O << "0"; - - O << ","; - - if (E.second) - O << E.second; - else - O << "0"; - - return O << ")"; -} - raw_ostream& operator<<(raw_ostream &O, std::pair E) { O << "("; diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 868f483..30f0d2f 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -47,7 +47,7 @@ static cl::opt printStyle("print-region-style", cl::values( clEnumValN(Region::PrintNone, "none", "print no details"), clEnumValN(Region::PrintBB, "bb", - "print regions in detail with block_node_iterator"), + "print regions in detail with block_iterator"), clEnumValN(Region::PrintRN, "rn", "print regions in detail with element_iterator"), clEnumValEnd)); @@ -246,22 +246,6 @@ void Region::verifyRegionNest() const { verifyRegion(); } -Region::block_node_iterator Region::block_node_begin() { - return GraphTraits >::nodes_begin(this); -} - -Region::block_node_iterator Region::block_node_end() { - return GraphTraits >::nodes_end(this); -} - -Region::const_block_node_iterator Region::block_node_begin() const { - return GraphTraits >::nodes_begin(this); -} - -Region::const_block_node_iterator Region::block_node_end() const { - return GraphTraits >::nodes_end(this); -} - Region::element_iterator Region::element_begin() { return GraphTraits::nodes_begin(this); } @@ -425,10 +409,8 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level, OS.indent(level*2 + 2); if (Style == PrintBB) { - for (const_block_node_iterator I = block_node_begin(), - E = block_node_end(); - I != E; ++I) - OS << **I << ", "; // TODO: remove the last "," + for (const_block_iterator I = block_begin(), E = block_end(); I != E; ++I) + OS << (*I)->getName() << ", "; // TODO: remove the last "," } else if (Style == PrintRN) { for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) OS << **I << ", "; // TODO: remove the last ", @@ -445,9 +427,11 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level, OS.indent(level*2) << "} \n"; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void Region::dump() const { print(dbgs(), true, getDepth(), printStyle.getValue()); } +#endif void Region::clearNodeCache() { // Free the cached nodes. diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index c97b5eb..9208fa2 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -195,10 +195,9 @@ public: virtual bool runOnRegion(Region *R, RGPassManager &RGM) { Out << Banner; - for (Region::block_node_iterator I = R->block_node_begin(), - E = R->block_node_end(); + for (Region::block_iterator I = R->block_begin(), E = R->block_end(); I != E; ++I) - (*I)->getEntry()->print(Out); + (*I)->print(Out); return false; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index a654648..e3189ec 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -73,7 +73,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" @@ -105,6 +105,11 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, "derived loop"), cl::init(100)); +// FIXME: Enable this with XDEBUG when the test suite is clean. +static cl::opt +VerifySCEV("verify-scev", + cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); + INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) @@ -122,10 +127,12 @@ char ScalarEvolution::ID = 0; // Implementation of the SCEV class. // +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif void SCEV::print(raw_ostream &OS) const { switch (getSCEVType()) { @@ -2580,7 +2587,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, } const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { - // If we have TargetData, we can bypass creating a target-independent + // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) @@ -2606,7 +2613,7 @@ const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, unsigned FieldNo) { - // If we have TargetData, we can bypass creating a target-independent + // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) @@ -2671,7 +2678,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const { uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - // If we have a TargetData, use it! + // If we have a DataLayout, use it! if (TD) return TD->getTypeSizeInBits(Ty); @@ -2679,7 +2686,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { if (Ty->isIntegerTy()) return Ty->getPrimitiveSizeInBits(); - // The only other support type is pointer. Without TargetData, conservatively + // The only other support type is pointer. Without DataLayout, conservatively // assume pointers are 64-bit. assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); return 64; @@ -2699,7 +2706,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); if (TD) return TD->getIntPtrType(getContext()); - // Without TargetData, conservatively assume pointers are 64-bit. + // Without DataLayout, conservatively assume pointers are 64-bit. return Type::getInt64Ty(getContext()); } @@ -3978,8 +3985,11 @@ getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) { ConstantInt *Result = MulC->getValue(); - // Guard against huge trip counts. - if (!Result || Result->getValue().getActiveBits() > 32) + // Guard against huge trip counts (this requires checking + // for zero to handle the case where the trip count == -1 and the + // addition wraps). + if (!Result || Result->getValue().getActiveBits() > 32 || + Result->getValue().getActiveBits() == 0) return 1; return (unsigned)Result->getZExtValue(); @@ -4749,7 +4759,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap &Vals, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast(V)) return C; @@ -6141,7 +6151,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, return CmpInst::isTrueWhenEqual(Pred); if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) if (FoundLHS == FoundRHS) - return CmpInst::isFalseWhenEqual(Pred); + return CmpInst::isFalseWhenEqual(FoundPred); // Check to see if we can make the LHS or RHS match. if (LHS == FoundRHS || RHS == FoundLHS) { @@ -6588,7 +6598,7 @@ ScalarEvolution::ScalarEvolution() bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis(); - TD = getAnalysisIfAvailable(); + TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); DT = &getAnalysis(); return false; @@ -6930,3 +6940,87 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { UnsignedRanges.erase(S); SignedRanges.erase(S); } + +typedef DenseMap VerifyMap; + +/// replaceSubString - Replaces all occurences of From in Str with To. +static void replaceSubString(std::string &Str, StringRef From, StringRef To) { + size_t Pos = 0; + while ((Pos = Str.find(From, Pos)) != std::string::npos) { + Str.replace(Pos, From.size(), To.data(), To.size()); + Pos += To.size(); + } +} + +/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. +static void +getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { + for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) { + getLoopBackedgeTakenCounts(*I, Map, SE); // recurse. + + std::string &S = Map[L]; + if (S.empty()) { + raw_string_ostream OS(S); + SE.getBackedgeTakenCount(L)->print(OS); + + // false and 0 are semantically equivalent. This can happen in dead loops. + replaceSubString(OS.str(), "false", "0"); + // Remove wrap flags, their use in SCEV is highly fragile. + // FIXME: Remove this when SCEV gets smarter about them. + replaceSubString(OS.str(), "", ""); + replaceSubString(OS.str(), "", ""); + replaceSubString(OS.str(), "", ""); + } + } +} + +void ScalarEvolution::verifyAnalysis() const { + if (!VerifySCEV) + return; + + ScalarEvolution &SE = *const_cast(this); + + // Gather stringified backedge taken counts for all loops using SCEV's caches. + // FIXME: It would be much better to store actual values instead of strings, + // but SCEV pointers will change if we drop the caches. + VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); + + // Gather stringified backedge taken counts for all loops without using + // SCEV's caches. + SE.releaseMemory(); + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE); + + // Now compare whether they're the same with and without caches. This allows + // verifying that no pass changed the cache. + assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && + "New loops suddenly appeared!"); + + for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), + OldE = BackedgeDumpsOld.end(), + NewI = BackedgeDumpsNew.begin(); + OldI != OldE; ++OldI, ++NewI) { + assert(OldI->first == NewI->first && "Loop order changed!"); + + // Compare the stringified SCEVs. We don't care if undef backedgetaken count + // changes. + // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This + // means that a pass is buggy or SCEV has to learn a new pattern but is + // usually not harmful. + if (OldI->second != NewI->second && + OldI->second.find("undef") == std::string::npos && + NewI->second.find("undef") == std::string::npos && + OldI->second != "***COULDNOTCOMPUTE***" && + NewI->second != "***COULDNOTCOMPUTE***") { + dbgs() << "SCEVValidator: SCEV for loop '" + << OldI->first->getHeader()->getName() + << "' changed from '" << OldI->second + << "' to '" << NewI->second << "'!\n"; + std::abort(); + } + } + + // TODO: Verify more things. +} diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 62710c5..111bfb4 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -18,7 +18,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/STLExtras.h" @@ -212,7 +212,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, const SCEV *Factor, ScalarEvolution &SE, - const TargetData *TD) { + const DataLayout *TD) { // Everything is divisible by one. if (Factor->isOne()) return true; @@ -253,7 +253,7 @@ static bool FactorOutConstant(const SCEV *&S, // of the given factor. if (const SCEVMulExpr *M = dyn_cast(S)) { if (TD) { - // With TargetData, the size is known. Check if there is a constant + // With DataLayout, the size is known. Check if there is a constant // operand which is a multiple of the given factor. If so, we can // factor it. const SCEVConstant *FC = cast(Factor); @@ -267,7 +267,7 @@ static bool FactorOutConstant(const SCEV *&S, return true; } } else { - // Without TargetData, check if Factor can be factored out of any of the + // Without DataLayout, check if Factor can be factored out of any of the // Mul's operands. If so, we can just remove it. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { const SCEV *SOp = M->getOperand(i); @@ -458,7 +458,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // An empty struct has no fields. if (STy->getNumElements() == 0) break; if (SE.TD) { - // With TargetData, field offsets are known. See if a constant offset + // With DataLayout, field offsets are known. See if a constant offset // falls within any of the struct fields. if (Ops.empty()) break; if (const SCEVConstant *C = dyn_cast(Ops[0])) @@ -477,7 +477,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } } else { - // Without TargetData, just check for an offsetof expression of the + // Without DataLayout, just check for an offsetof expression of the // appropriate struct type. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (const SCEVUnknown *U = dyn_cast(Ops[i])) { @@ -1618,6 +1618,17 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, PEnd = Phis.end(); PIter != PEnd; ++PIter) { PHINode *Phi = *PIter; + // Fold constant phis. They may be congruent to other constant phis and + // would confuse the logic below that expects proper IVs. + if (Value *V = Phi->hasConstantValue()) { + Phi->replaceAllUsesWith(V); + DeadInsts.push_back(Phi); + ++NumElim; + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); + continue; + } + if (!SE.isSCEVable(Phi->getType())) continue; diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index ff5010b..22da857 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -43,9 +43,11 @@ void Trace::print(raw_ostream &O) const { O << "; Trace parent function: \n" << *F; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// dump - Debugger convenience method; writes trace to standard error /// output stream. /// void Trace::dump() const { print(dbgs()); } +#endif diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index cea34e1..3beb373 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -22,7 +22,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" @@ -36,7 +36,7 @@ const unsigned MaxDepth = 6; /// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if /// unknown returns 0). For vector types, returns the element type's bitwidth. -static unsigned getBitWidth(Type *Ty, const TargetData *TD) { +static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; assert(isa(Ty) && "Expected a pointer type!"); @@ -46,7 +46,7 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) { static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const TargetData *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth) { if (!Add) { if (ConstantInt *CLHS = dyn_cast(Op0)) { // We know that the top bits of C-X are clear if X contains less bits @@ -132,7 +132,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const TargetData *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth) { unsigned BitWidth = KnownZero.getBitWidth(); ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); @@ -226,7 +226,7 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const TargetData *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); @@ -308,11 +308,20 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } if (Argument *A = dyn_cast(V)) { - // Get alignment information off byval arguments if specified in the IR. - if (A->hasByValAttr()) - if (unsigned Align = A->getParamAlignment()) - KnownZero = APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + unsigned Align = 0; + + if (A->hasByValAttr()) { + // Get alignment information off byval arguments if specified in the IR. + Align = A->getParamAlignment(); + } else if (TD && A->hasStructRetAttr()) { + // An sret parameter has at least the ABI alignment of the return type. + Type *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) + Align = TD->getABITypeAlignment(EltTy); + } + + if (Align) + KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); return; } @@ -420,15 +429,13 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::ZExt: case Instruction::Trunc: { Type *SrcTy = I->getOperand(0)->getType(); - + unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - if (SrcTy->isPointerTy()) - SrcBitWidth = TD->getTypeSizeInBits(SrcTy); - else - SrcBitWidth = SrcTy->getScalarSizeInBits(); - + SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + + assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); @@ -778,7 +785,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, /// ComputeSignBit - Determine whether the sign bit is known to be zero or /// one. Convenience wrapper around ComputeMaskedBits. void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const TargetData *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth) { unsigned BitWidth = getBitWidth(V->getType(), TD); if (!BitWidth) { KnownZero = false; @@ -796,7 +803,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero, +bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, unsigned Depth) { if (Constant *C = dyn_cast(V)) { if (C->isNullValue()) @@ -859,7 +866,7 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero, /// when defined. For vectors return true if every element is known to be /// non-zero when defined. Supports values with integer or pointer type and /// vectors of integers. -bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { +bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { if (Constant *C = dyn_cast(V)) { if (C->isNullValue()) return false; @@ -986,7 +993,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, - const TargetData *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); @@ -1003,10 +1010,10 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, /// /// 'Op' must have a scalar integer type. /// -unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, +unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, unsigned Depth) { assert((TD || V->getType()->isIntOrIntVectorTy()) && - "ComputeNumSignBits requires a TargetData object to operate " + "ComputeNumSignBits requires a DataLayout object to operate " "on non-integer values!"); Type *Ty = V->getType(); unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : @@ -1582,7 +1589,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const TargetData &TD) { + const DataLayout &TD) { Operator *PtrOp = dyn_cast(Ptr); if (PtrOp == 0 || Ptr->getType()->isVectorTy()) return Ptr; @@ -1614,7 +1621,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, // right. unsigned PtrSize = TD.getPointerSizeInBits(); if (PtrSize < 64) - Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + Offset = SignExtend64(Offset, PtrSize); return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); } @@ -1768,7 +1775,7 @@ uint64_t llvm::GetStringLength(Value *V) { } Value * -llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { +llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { if (!V->getType()->isPointerTy()) return V; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { @@ -1799,7 +1806,7 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, - const TargetData *TD, + const DataLayout *TD, unsigned MaxLookup) { SmallPtrSet Visited; SmallVector Worklist; @@ -1844,7 +1851,7 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { } bool llvm::isSafeToSpeculativelyExecute(const Value *V, - const TargetData *TD) { + const DataLayout *TD) { const Operator *Inst = dyn_cast(V); if (!Inst) return false; diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h index 55684f7..639f5ac 100644 --- a/lib/Archive/ArchiveInternals.h +++ b/lib/Archive/ArchiveInternals.h @@ -66,7 +66,7 @@ namespace llvm { fmag[1] = '\n'; } - bool checkSignature() { + bool checkSignature() const { return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2); } }; diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 5cfc810..5052495 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -79,7 +79,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) } // Cast archive member header - ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; + const ArchiveMemberHeader* Hdr = (const ArchiveMemberHeader*)At; At += sizeof(ArchiveMemberHeader); int flags = 0; @@ -196,7 +196,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) /* FALL THROUGH */ default: - char* slash = (char*) memchr(Hdr->name, '/', 16); + const char* slash = (const char*) memchr(Hdr->name, '/', 16); if (slash == 0) slash = Hdr->name + 16; pathname.assign(Hdr->name, slash - Hdr->name); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 481733d..a60e4aa 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -456,11 +456,12 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(private); KEYWORD(linker_private); KEYWORD(linker_private_weak); - KEYWORD(linker_private_weak_def_auto); + KEYWORD(linker_private_weak_def_auto); // FIXME: For backwards compatibility. KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); KEYWORD(linkonce_odr); + KEYWORD(linkonce_odr_auto_hide); KEYWORD(weak); KEYWORD(weak_odr); KEYWORD(appending); @@ -509,6 +510,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(asm); KEYWORD(sideeffect); KEYWORD(alignstack); + KEYWORD(inteldialect); KEYWORD(gc); KEYWORD(ccc); @@ -523,6 +525,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(msp430_intrcc); KEYWORD(ptx_kernel); KEYWORD(ptx_device); + KEYWORD(spir_kernel); + KEYWORD(spir_func); + KEYWORD(intel_ocl_bicc); KEYWORD(cc); KEYWORD(c); @@ -553,7 +558,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(naked); KEYWORD(nonlazybind); KEYWORD(address_safety); - KEYWORD(ia_nsdialect); + KEYWORD(minsize); KEYWORD(type); KEYWORD(opaque); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 0ff8edd..b24291f 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -184,12 +184,13 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_private: // OptionalLinkage case lltok::kw_linker_private: // OptionalLinkage case lltok::kw_linker_private_weak: // OptionalLinkage - case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage + case lltok::kw_linker_private_weak_def_auto: // FIXME: backwards compat. case lltok::kw_internal: // OptionalLinkage case lltok::kw_weak: // OptionalLinkage case lltok::kw_weak_odr: // OptionalLinkage case lltok::kw_linkonce: // OptionalLinkage case lltok::kw_linkonce_odr: // OptionalLinkage + case lltok::kw_linkonce_odr_auto_hide: // OptionalLinkage case lltok::kw_appending: // OptionalLinkage case lltok::kw_dllexport: // OptionalLinkage case lltok::kw_common: // OptionalLinkage @@ -576,8 +577,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, Linkage != GlobalValue::InternalLinkage && Linkage != GlobalValue::PrivateLinkage && Linkage != GlobalValue::LinkerPrivateLinkage && - Linkage != GlobalValue::LinkerPrivateWeakLinkage && - Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage) + Linkage != GlobalValue::LinkerPrivateWeakLinkage) return Error(LinkageLoc, "invalid linkage type for alias"); Constant *Aliasee; @@ -779,7 +779,9 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); else FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, 0, Name); + GlobalValue::ExternalWeakLinkage, 0, Name, + 0, GlobalVariable::NotThreadLocal, + PTy->getAddressSpace()); ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); return FwdVal; @@ -916,59 +918,50 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { /// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind /// indicates what kind of attribute list this is: 0: function arg, 1: result, /// 2: function attr. -bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { - Attrs = Attribute::None; +bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) { LocTy AttrLoc = Lex.getLoc(); + bool HaveError = false; + + B.clear(); while (1) { - switch (Lex.getKind()) { + lltok::Kind Token = Lex.getKind(); + switch (Token) { default: // End of attributes. - if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly)) - return Error(AttrLoc, "invalid use of function-only attribute"); - - // As a hack, we allow "align 2" on functions as a synonym for - // "alignstack 2". - if (AttrKind == 2 && - (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment))) - return Error(AttrLoc, "invalid use of attribute on a function"); - - if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly)) - return Error(AttrLoc, "invalid use of parameter-only attribute"); - - return false; - case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break; - case lltok::kw_signext: Attrs |= Attribute::SExt; break; - case lltok::kw_inreg: Attrs |= Attribute::InReg; break; - case lltok::kw_sret: Attrs |= Attribute::StructRet; break; - case lltok::kw_noalias: Attrs |= Attribute::NoAlias; break; - case lltok::kw_nocapture: Attrs |= Attribute::NoCapture; break; - case lltok::kw_byval: Attrs |= Attribute::ByVal; break; - case lltok::kw_nest: Attrs |= Attribute::Nest; break; - - case lltok::kw_noreturn: Attrs |= Attribute::NoReturn; break; - case lltok::kw_nounwind: Attrs |= Attribute::NoUnwind; break; - case lltok::kw_uwtable: Attrs |= Attribute::UWTable; break; - case lltok::kw_returns_twice: Attrs |= Attribute::ReturnsTwice; break; - case lltok::kw_noinline: Attrs |= Attribute::NoInline; break; - case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break; - case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break; - case lltok::kw_inlinehint: Attrs |= Attribute::InlineHint; break; - case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break; - case lltok::kw_optsize: Attrs |= Attribute::OptimizeForSize; break; - case lltok::kw_ssp: Attrs |= Attribute::StackProtect; break; - case lltok::kw_sspreq: Attrs |= Attribute::StackProtectReq; break; - case lltok::kw_noredzone: Attrs |= Attribute::NoRedZone; break; - case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break; - case lltok::kw_naked: Attrs |= Attribute::Naked; break; - case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break; - case lltok::kw_address_safety: Attrs |= Attribute::AddressSafety; break; - case lltok::kw_ia_nsdialect: Attrs |= Attribute::IANSDialect; break; + return HaveError; + case lltok::kw_zeroext: B.addAttribute(Attributes::ZExt); break; + case lltok::kw_signext: B.addAttribute(Attributes::SExt); break; + case lltok::kw_inreg: B.addAttribute(Attributes::InReg); break; + case lltok::kw_sret: B.addAttribute(Attributes::StructRet); break; + case lltok::kw_noalias: B.addAttribute(Attributes::NoAlias); break; + case lltok::kw_nocapture: B.addAttribute(Attributes::NoCapture); break; + case lltok::kw_byval: B.addAttribute(Attributes::ByVal); break; + case lltok::kw_nest: B.addAttribute(Attributes::Nest); break; + + case lltok::kw_noreturn: B.addAttribute(Attributes::NoReturn); break; + case lltok::kw_nounwind: B.addAttribute(Attributes::NoUnwind); break; + case lltok::kw_uwtable: B.addAttribute(Attributes::UWTable); break; + case lltok::kw_returns_twice: B.addAttribute(Attributes::ReturnsTwice); break; + case lltok::kw_noinline: B.addAttribute(Attributes::NoInline); break; + case lltok::kw_readnone: B.addAttribute(Attributes::ReadNone); break; + case lltok::kw_readonly: B.addAttribute(Attributes::ReadOnly); break; + case lltok::kw_inlinehint: B.addAttribute(Attributes::InlineHint); break; + case lltok::kw_alwaysinline: B.addAttribute(Attributes::AlwaysInline); break; + case lltok::kw_optsize: B.addAttribute(Attributes::OptimizeForSize); break; + case lltok::kw_ssp: B.addAttribute(Attributes::StackProtect); break; + case lltok::kw_sspreq: B.addAttribute(Attributes::StackProtectReq); break; + case lltok::kw_noredzone: B.addAttribute(Attributes::NoRedZone); break; + case lltok::kw_noimplicitfloat: B.addAttribute(Attributes::NoImplicitFloat); break; + case lltok::kw_naked: B.addAttribute(Attributes::Naked); break; + case lltok::kw_nonlazybind: B.addAttribute(Attributes::NonLazyBind); break; + case lltok::kw_address_safety: B.addAttribute(Attributes::AddressSafety); break; + case lltok::kw_minsize: B.addAttribute(Attributes::MinSize); break; case lltok::kw_alignstack: { unsigned Alignment; if (ParseOptionalStackAlignment(Alignment)) return true; - Attrs |= Attribute::constructStackAlignmentFromInt(Alignment); + B.addStackAlignmentAttr(Alignment); continue; } @@ -976,11 +969,57 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { unsigned Alignment; if (ParseOptionalAlignment(Alignment)) return true; - Attrs |= Attribute::constructAlignmentFromInt(Alignment); + B.addAlignmentAttr(Alignment); continue; } } + + // Perform some error checking. + switch (Token) { + default: + if (AttrKind == 2) + HaveError |= Error(AttrLoc, "invalid use of attribute on a function"); + break; + case lltok::kw_align: + // As a hack, we allow "align 2" on functions as a synonym for + // "alignstack 2". + break; + + // Parameter Only: + case lltok::kw_sret: + case lltok::kw_nocapture: + case lltok::kw_byval: + case lltok::kw_nest: + if (AttrKind != 0) + HaveError |= Error(AttrLoc, "invalid use of parameter-only attribute"); + break; + + // Function Only: + case lltok::kw_noreturn: + case lltok::kw_nounwind: + case lltok::kw_readnone: + case lltok::kw_readonly: + case lltok::kw_noinline: + case lltok::kw_alwaysinline: + case lltok::kw_optsize: + case lltok::kw_ssp: + case lltok::kw_sspreq: + case lltok::kw_noredzone: + case lltok::kw_noimplicitfloat: + case lltok::kw_naked: + case lltok::kw_inlinehint: + case lltok::kw_alignstack: + case lltok::kw_uwtable: + case lltok::kw_nonlazybind: + case lltok::kw_returns_twice: + case lltok::kw_address_safety: + case lltok::kw_minsize: + if (AttrKind != 2) + HaveError |= Error(AttrLoc, "invalid use of function-only attribute"); + break; + } + Lex.Lex(); } } @@ -990,12 +1029,12 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { /// ::= 'private' /// ::= 'linker_private' /// ::= 'linker_private_weak' -/// ::= 'linker_private_weak_def_auto' /// ::= 'internal' /// ::= 'weak' /// ::= 'weak_odr' /// ::= 'linkonce' /// ::= 'linkonce_odr' +/// ::= 'linkonce_odr_auto_hide' /// ::= 'available_externally' /// ::= 'appending' /// ::= 'dllexport' @@ -1012,14 +1051,15 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { case lltok::kw_linker_private_weak: Res = GlobalValue::LinkerPrivateWeakLinkage; break; - case lltok::kw_linker_private_weak_def_auto: - Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage; - break; case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break; case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break; case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break; case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break; + case lltok::kw_linkonce_odr_auto_hide: + case lltok::kw_linker_private_weak_def_auto: // FIXME: For backwards compat. + Res = GlobalValue::LinkOnceODRAutoHideLinkage; + break; case lltok::kw_available_externally: Res = GlobalValue::AvailableExternallyLinkage; break; @@ -1056,6 +1096,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= /*empty*/ /// ::= 'ccc' /// ::= 'fastcc' +/// ::= 'kw_intel_ocl_bicc' /// ::= 'coldcc' /// ::= 'x86_stdcallcc' /// ::= 'x86_fastcallcc' @@ -1066,6 +1107,8 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= 'msp430_intrcc' /// ::= 'ptx_kernel' /// ::= 'ptx_device' +/// ::= 'spir_func' +/// ::= 'spir_kernel' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { @@ -1083,6 +1126,9 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break; case lltok::kw_ptx_kernel: CC = CallingConv::PTX_Kernel; break; case lltok::kw_ptx_device: CC = CallingConv::PTX_Device; break; + case lltok::kw_spir_kernel: CC = CallingConv::SPIR_KERNEL; break; + case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break; + case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break; case lltok::kw_cc: { unsigned ArbitraryCC; Lex.Lex(); @@ -1395,16 +1441,16 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, // Parse the argument. LocTy ArgLoc; Type *ArgTy = 0; - Attributes ArgAttrs1; - Attributes ArgAttrs2; + AttrBuilder ArgAttrs; Value *V; if (ParseType(ArgTy, ArgLoc)) return true; // Otherwise, handle normal operands. - if (ParseOptionalAttrs(ArgAttrs1, 0) || ParseValue(ArgTy, V, PFS)) + if (ParseOptionalAttrs(ArgAttrs, 0) || ParseValue(ArgTy, V, PFS)) return true; - ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2)); + ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(V->getContext(), + ArgAttrs))); } Lex.Lex(); // Lex the ')'. @@ -1436,7 +1482,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, } else { LocTy TypeLoc = Lex.getLoc(); Type *ArgTy = 0; - Attributes Attrs; + AttrBuilder Attrs; std::string Name; if (ParseType(ArgTy) || @@ -1453,7 +1499,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, if (!FunctionType::isValidArgumentType(ArgTy)) return Error(TypeLoc, "invalid type for function argument"); - ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name)); + ArgList.push_back(ArgInfo(TypeLoc, ArgTy, + Attributes::get(ArgTy->getContext(), + Attrs), Name)); while (EatIfPresent(lltok::comma)) { // Handle ... at end of arg list. @@ -1479,7 +1527,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, if (!ArgTy->isFirstClassType()) return Error(TypeLoc, "invalid type for function argument"); - ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name)); + ArgList.push_back(ArgInfo(TypeLoc, ArgTy, + Attributes::get(ArgTy->getContext(), Attrs), + Name)); } } @@ -1503,7 +1553,7 @@ bool LLParser::ParseFunctionType(Type *&Result) { for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { if (!ArgList[i].Name.empty()) return Error(ArgList[i].Loc, "argument name invalid in function type"); - if (ArgList[i].Attrs) + if (ArgList[i].Attrs.hasAttributes()) return Error(ArgList[i].Loc, "argument attributes invalid in function type"); } @@ -2069,16 +2119,18 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { case lltok::kw_asm: { // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT - bool HasSideEffect, AlignStack; + bool HasSideEffect, AlignStack, AsmDialect; Lex.Lex(); if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) || ParseOptionalToken(lltok::kw_alignstack, AlignStack) || + ParseOptionalToken(lltok::kw_inteldialect, AsmDialect) || ParseStringConstant(ID.StrVal) || ParseToken(lltok::comma, "expected comma in inline asm expression") || ParseToken(lltok::StringConstant, "expected constraint string")) return true; ID.StrVal2 = Lex.getStrVal(); - ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1); + ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1) | + (unsigned(AsmDialect)<<2); ID.Kind = ValID::t_InlineAsm; return false; } @@ -2495,7 +2547,8 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, PTy ? dyn_cast(PTy->getElementType()) : 0; if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) return Error(ID.Loc, "invalid type for inline asm constraint string"); - V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1); + V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, + (ID.UIntVal>>1)&1, (InlineAsm::AsmDialect(ID.UIntVal>>2))); return false; } case ValID::t_MDNode: @@ -2630,7 +2683,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { unsigned Linkage; unsigned Visibility; - Attributes RetAttrs; + AttrBuilder RetAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc = Lex.getLoc(); @@ -2653,11 +2706,11 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { case GlobalValue::PrivateLinkage: case GlobalValue::LinkerPrivateLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: case GlobalValue::InternalLinkage: case GlobalValue::AvailableExternallyLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::DLLExportLinkage: @@ -2694,7 +2747,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { SmallVector ArgList; bool isVarArg; - Attributes FuncAttrs; + AttrBuilder FuncAttrs; std::string Section; unsigned Alignment; std::string GC; @@ -2713,9 +2766,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { return true; // If the alignment was parsed as an attribute, move to the alignment field. - if (FuncAttrs & Attribute::Alignment) { - Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs); - FuncAttrs &= ~Attribute::Alignment; + if (FuncAttrs.hasAlignmentAttr()) { + Alignment = FuncAttrs.getAlignment(); + FuncAttrs.removeAttribute(Attributes::Alignment); } // Okay, if we got here, the function is syntactically valid. Convert types @@ -2723,21 +2776,28 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { std::vector ParamTypeList; SmallVector Attrs; - if (RetAttrs != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); + if (RetAttrs.hasAttributes()) + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attributes::get(RetType->getContext(), + RetAttrs))); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { ParamTypeList.push_back(ArgList[i].Ty); - if (ArgList[i].Attrs != Attribute::None) + if (ArgList[i].Attrs.hasAttributes()) Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); } - if (FuncAttrs != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs)); + if (FuncAttrs.hasAttributes()) + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::FunctionIndex, + Attributes::get(RetType->getContext(), + FuncAttrs))); - AttrListPtr PAL = AttrListPtr::get(Attrs); + AttrListPtr PAL = AttrListPtr::get(Context, Attrs); - if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy()) + if (PAL.getParamAttributes(1).hasAttribute(Attributes::StructRet) && + !RetType->isVoidTy()) return Error(RetTypeLoc, "functions with 'sret' argument must return void"); FunctionType *FT = @@ -2752,6 +2812,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { ForwardRefVals.find(FunctionName); if (FRVI != ForwardRefVals.end()) { Fn = M->getFunction(FunctionName); + if (!Fn) + return Error(FRVI->second.second, "invalid forward reference to " + "function as global value!"); if (Fn->getType() != PFT) return Error(FRVI->second.second, "invalid forward reference to " "function '" + FunctionName + "' with wrong type!"); @@ -3205,7 +3268,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) { /// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy CallLoc = Lex.getLoc(); - Attributes RetAttrs, FnAttrs; + AttrBuilder RetAttrs, FnAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3250,8 +3313,11 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { // Set up the Attributes for the function. SmallVector Attrs; - if (RetAttrs != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); + if (RetAttrs.hasAttributes()) + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attributes::get(Callee->getContext(), + RetAttrs))); SmallVector Args; @@ -3271,18 +3337,21 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - if (ArgList[i].Attrs != Attribute::None) + if (ArgList[i].Attrs.hasAttributes()) Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); } if (I != E) return Error(CallLoc, "not enough parameters specified for call"); - if (FnAttrs != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs)); + if (FnAttrs.hasAttributes()) + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::FunctionIndex, + Attributes::get(Callee->getContext(), + FnAttrs))); // Finish off the Attributes and check them - AttrListPtr PAL = AttrListPtr::get(Attrs); + AttrListPtr PAL = AttrListPtr::get(Context, Attrs); InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args); II->setCallingConv(CC); @@ -3604,7 +3673,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { /// ParameterList OptionalAttrs bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { - Attributes RetAttrs, FnAttrs; + AttrBuilder RetAttrs, FnAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3646,8 +3715,11 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // Set up the Attributes for the function. SmallVector Attrs; - if (RetAttrs != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); + if (RetAttrs.hasAttributes()) + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attributes::get(Callee->getContext(), + RetAttrs))); SmallVector Args; @@ -3667,18 +3739,21 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - if (ArgList[i].Attrs != Attribute::None) + if (ArgList[i].Attrs.hasAttributes()) Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); } if (I != E) return Error(CallLoc, "not enough parameters specified for call"); - if (FnAttrs != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs)); + if (FnAttrs.hasAttributes()) + Attrs.push_back( + AttributeWithIndex::get(AttrListPtr::FunctionIndex, + Attributes::get(Callee->getContext(), + FnAttrs))); // Finish off the Attributes and check them - AttrListPtr PAL = AttrListPtr::get(Attrs); + AttrListPtr PAL = AttrListPtr::get(Context, Attrs); CallInst *CI = CallInst::Create(Callee, Args); CI->setTailCall(isTail); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 257c726..c6bbdb2 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -175,7 +175,7 @@ namespace llvm { bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalAddrSpace(unsigned &AddrSpace); - bool ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind); + bool ParseOptionalAttrs(AttrBuilder &Attrs, unsigned AttrKind); bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); bool ParseOptionalLinkage(unsigned &Linkage) { bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 0b0b980..036686d 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -37,8 +37,10 @@ namespace lltok { kw_global, kw_constant, kw_private, kw_linker_private, kw_linker_private_weak, - kw_linker_private_weak_def_auto, kw_internal, - kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending, + kw_linker_private_weak_def_auto, // FIXME: For backwards compatibility. + kw_internal, + kw_linkonce, kw_linkonce_odr, kw_linkonce_odr_auto_hide, + kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, kw_unnamed_addr, @@ -70,14 +72,17 @@ namespace lltok { kw_asm, kw_sideeffect, kw_alignstack, + kw_inteldialect, kw_gc, kw_c, kw_cc, kw_ccc, kw_fastcc, kw_coldcc, + kw_intel_ocl_bicc, kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc, kw_msp430_intrcc, kw_ptx_kernel, kw_ptx_device, + kw_spir_kernel, kw_spir_func, kw_signext, kw_zeroext, @@ -105,7 +110,7 @@ namespace lltok { kw_naked, kw_nonlazybind, kw_address_safety, - kw_ia_nsdialect, + kw_minsize, kw_type, kw_opaque, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4ffee38..4ec9da1 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -52,6 +52,8 @@ void BitcodeReader::FreeState() { std::vector().swap(FunctionsWithBodies); DeferredFunctionInfo.clear(); MDKindMap.clear(); + + assert(BlockAddrFwdRefs.empty() && "Unresolved blockaddress fwd references"); } //===----------------------------------------------------------------------===// @@ -89,7 +91,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) { case 12: return GlobalValue::AvailableExternallyLinkage; case 13: return GlobalValue::LinkerPrivateLinkage; case 14: return GlobalValue::LinkerPrivateWeakLinkage; - case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage; + case 15: return GlobalValue::LinkOnceODRAutoHideLinkage; } } @@ -197,7 +199,7 @@ namespace { /// @brief A class for maintaining the slot number definition /// as a placeholder for the actual definition for forward constants defs. class ConstantPlaceHolder : public ConstantExpr { - void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT + void operator=(const ConstantPlaceHolder &) LLVM_DELETED_FUNCTION; public: // allocate space for exactly one operand void *operator new(size_t s) { @@ -209,7 +211,6 @@ namespace { } /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. - //static inline bool classof(const ConstantPlaceHolder *) { return true; } static bool classof(const Value *V) { return isa(V) && cast(V)->getOpcode() == Instruction::UserOp1; @@ -475,17 +476,18 @@ bool BitcodeReader::ParseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { Attributes ReconstitutedAttr = - Attribute::decodeLLVMAttributesForBitcode(Record[i+1]); + Attributes::decodeLLVMAttributesForBitcode(Context, Record[i+1]); Record[i+1] = ReconstitutedAttr.Raw(); } for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - if (Attributes(Record[i+1]) != Attribute::None) + AttrBuilder B(Record[i+1]); + if (B.hasAttributes()) Attrs.push_back(AttributeWithIndex::get(Record[i], - Attributes(Record[i+1]))); + Attributes::get(Context, B))); } - MAttributes.push_back(AttrListPtr::get(Attrs)); + MAttributes.push_back(AttrListPtr::get(Context, Attrs)); Attrs.clear(); break; } @@ -889,9 +891,9 @@ bool BitcodeReader::ParseMetadata() { } } -/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in +/// decodeSignRotatedValue - Decode a signed value stored with the sign bit in /// the LSB for dense VBR encoding. -static uint64_t DecodeSignRotatedValue(uint64_t V) { +uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { if ((V & 1) == 0) return V >> 1; if (V != 1) @@ -941,7 +943,7 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() { static APInt ReadWideAPInt(ArrayRef Vals, unsigned TypeBits) { SmallVector Words(Vals.size()); std::transform(Vals.begin(), Vals.end(), Words.begin(), - DecodeSignRotatedValue); + BitcodeReader::decodeSignRotatedValue); return APInt(TypeBits, Words); } @@ -995,7 +997,7 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_INTEGER: // INTEGER: [intval] if (!CurTy->isIntegerTy() || Record.empty()) return Error("Invalid CST_INTEGER record"); - V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0])); + V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0])); break; case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] if (!CurTy->isIntegerTy() || Record.empty()) @@ -1245,7 +1247,9 @@ bool BitcodeReader::ParseConstants() { V = ConstantExpr::getICmp(Record[3], Op0, Op1); break; } - case bitc::CST_CODE_INLINEASM: { + // This maintains backward compatibility, pre-asm dialect keywords. + // FIXME: Remove with the 4.0 release. + case bitc::CST_CODE_INLINEASM_OLD: { if (Record.size() < 2) return Error("Invalid INLINEASM record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; @@ -1266,6 +1270,31 @@ bool BitcodeReader::ParseConstants() { AsmStr, ConstrStr, HasSideEffects, IsAlignStack); break; } + // This version adds support for the asm dialect keywords (e.g., + // inteldialect). + case bitc::CST_CODE_INLINEASM: { + if (Record.size() < 2) return Error("Invalid INLINEASM record"); + std::string AsmStr, ConstrStr; + bool HasSideEffects = Record[0] & 1; + bool IsAlignStack = (Record[0] >> 1) & 1; + unsigned AsmDialect = Record[0] >> 2; + unsigned AsmStrSize = Record[1]; + if (2+AsmStrSize >= Record.size()) + return Error("Invalid INLINEASM record"); + unsigned ConstStrSize = Record[2+AsmStrSize]; + if (3+AsmStrSize+ConstStrSize > Record.size()) + return Error("Invalid INLINEASM record"); + + for (unsigned i = 0; i != AsmStrSize; ++i) + AsmStr += (char)Record[2+i]; + for (unsigned i = 0; i != ConstStrSize; ++i) + ConstrStr += (char)Record[3+AsmStrSize+i]; + PointerType *PTy = cast(CurTy); + V = InlineAsm::get(cast(PTy->getElementType()), + AsmStr, ConstrStr, HasSideEffects, IsAlignStack, + InlineAsm::AsmDialect(AsmDialect)); + break; + } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record"); Type *FnTy = getTypeByID(Record[0]); @@ -1273,13 +1302,27 @@ bool BitcodeReader::ParseConstants() { Function *Fn = dyn_cast_or_null(ValueList.getConstantFwdRef(Record[1],FnTy)); if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record"); - - GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(), - Type::getInt8Ty(Context), + + // If the function is already parsed we can insert the block address right + // away. + if (!Fn->empty()) { + Function::iterator BBI = Fn->begin(), BBE = Fn->end(); + for (size_t I = 0, E = Record[2]; I != E; ++I) { + if (BBI == BBE) + return Error("Invalid blockaddress block #"); + ++BBI; + } + V = BlockAddress::get(Fn, BBI); + } else { + // Otherwise insert a placeholder and remember it so it can be inserted + // when the function is parsed. + GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(), + Type::getInt8Ty(Context), false, GlobalValue::InternalLinkage, - 0, ""); - BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef)); - V = FwdRef; + 0, ""); + BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef)); + V = FwdRef; + } break; } } @@ -1481,13 +1524,22 @@ bool BitcodeReader::ParseModule(bool Resume) { // Read a record. switch (Stream.ReadRecord(Code, Record)) { default: break; // Default behavior, ignore unknown content. - case bitc::MODULE_CODE_VERSION: // VERSION: [version#] + case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] if (Record.size() < 1) return Error("Malformed MODULE_CODE_VERSION"); - // Only version #0 is supported so far. - if (Record[0] != 0) - return Error("Unknown bitstream version!"); + // Only version #0 and #1 are supported so far. + unsigned module_version = Record[0]; + switch (module_version) { + default: return Error("Unknown bitstream version!"); + case 0: + UseRelativeIDs = false; + break; + case 1: + UseRelativeIDs = true; + break; + } break; + } case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) @@ -1754,13 +1806,6 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { // Read a record. switch (Stream.ReadRecord(Code, Record)) { default: break; // Default behavior, ignore unknown content. - case bitc::MODULE_CODE_VERSION: // VERSION: [version#] - if (Record.size() < 1) - return Error("Malformed MODULE_CODE_VERSION"); - // Only version #0 is supported so far. - if (Record[0] != 0) - return Error("Unknown bitstream version!"); - break; case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) @@ -1973,7 +2018,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || - getValue(Record, OpNum, LHS->getType(), RHS) || + popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 > Record.size()) return Error("Invalid BINOP record"); @@ -2088,8 +2133,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || - getValue(Record, OpNum, TrueVal->getType(), FalseVal) || - getValue(Record, OpNum, Type::getInt1Ty(Context), Cond)) + popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || + popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond)) return Error("Invalid SELECT record"); I = SelectInst::Create(Cond, TrueVal, FalseVal); @@ -2103,7 +2148,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || - getValue(Record, OpNum, TrueVal->getType(), FalseVal) || + popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || getValueTypePair(Record, OpNum, NextValueNo, Cond)) return Error("Invalid SELECT record"); @@ -2128,7 +2173,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Vec, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || - getValue(Record, OpNum, Type::getInt32Ty(Context), Idx)) + popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx)) return Error("Invalid EXTRACTELT record"); I = ExtractElementInst::Create(Vec, Idx); InstructionList.push_back(I); @@ -2139,9 +2184,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Vec, *Elt, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast(Vec->getType())->getElementType(), Elt) || - getValue(Record, OpNum, Type::getInt32Ty(Context), Idx)) + popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx)) return Error("Invalid INSERTELT record"); I = InsertElementInst::Create(Vec, Elt, Idx); InstructionList.push_back(I); @@ -2152,7 +2197,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Vec1, *Vec2, *Mask; if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) || - getValue(Record, OpNum, Vec1->getType(), Vec2)) + popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2)) return Error("Invalid SHUFFLEVEC record"); if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) @@ -2172,7 +2217,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || - getValue(Record, OpNum, LHS->getType(), RHS) || + popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 != Record.size()) return Error("Invalid CMP record"); @@ -2217,7 +2262,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } else { BasicBlock *FalseDest = getBasicBlock(Record[1]); - Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context)); + Value *Cond = getValue(Record, 2, NextValueNo, + Type::getInt1Ty(Context)); if (FalseDest == 0 || Cond == 0) return Error("Invalid BR record"); I = BranchInst::Create(TrueDest, FalseDest, Cond); @@ -2233,7 +2279,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Type *OpTy = getTypeByID(Record[1]); unsigned ValueBitWidth = cast(OpTy)->getBitWidth(); - Value *Cond = getFnValueByID(Record[2], OpTy); + Value *Cond = getValue(Record, 2, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[3]); if (OpTy == 0 || Cond == 0 || Default == 0) return Error("Invalid SWITCH record"); @@ -2288,7 +2334,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Record.size() < 3 || (Record.size() & 1) == 0) return Error("Invalid SWITCH record"); Type *OpTy = getTypeByID(Record[0]); - Value *Cond = getFnValueByID(Record[1], OpTy); + Value *Cond = getValue(Record, 1, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[2]); if (OpTy == 0 || Cond == 0 || Default == 0) return Error("Invalid SWITCH record"); @@ -2312,7 +2358,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Record.size() < 2) return Error("Invalid INDIRECTBR record"); Type *OpTy = getTypeByID(Record[0]); - Value *Address = getFnValueByID(Record[1], OpTy); + Value *Address = getValue(Record, 1, NextValueNo, OpTy); if (OpTy == 0 || Address == 0) return Error("Invalid INDIRECTBR record"); unsigned NumDests = Record.size()-2; @@ -2354,7 +2400,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { SmallVector Ops; for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { - Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); + Ops.push_back(getValue(Record, OpNum, NextValueNo, + FTy->getParamType(i))); if (Ops.back() == 0) return Error("Invalid INVOKE record"); } @@ -2401,7 +2448,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(PN); for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { - Value *V = getFnValueByID(Record[1+i], Ty); + Value *V; + // With the new function encoding, it is possible that operands have + // negative IDs (for forward references). Use a signed VBR + // representation to keep the encoding small. + if (UseRelativeIDs) + V = getValueSigned(Record, 1+i, NextValueNo, Ty); + else + V = getValue(Record, 1+i, NextValueNo, Ty); BasicBlock *BB = getBasicBlock(Record[2+i]); if (!V || !BB) return Error("Invalid PHI record"); PN->addIncoming(V, BB); @@ -2499,7 +2553,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+2 != Record.size()) return Error("Invalid STORE record"); @@ -2513,7 +2567,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) return Error("Invalid STOREATOMIC record"); @@ -2536,9 +2590,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Ptr, *Cmp, *New; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Cmp) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), New) || OpNum+3 != Record.size()) return Error("Invalid CMPXCHG record"); @@ -2556,7 +2610,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Ptr, *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) return Error("Invalid ATOMICRMW record"); @@ -2610,7 +2664,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (FTy->getParamType(i)->isLabelTy()) Args.push_back(getBasicBlock(Record[OpNum])); else - Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); + Args.push_back(getValue(Record, OpNum, NextValueNo, + FTy->getParamType(i))); if (Args.back() == 0) return Error("Invalid CALL record"); } @@ -2639,7 +2694,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Record.size() < 3) return Error("Invalid VAARG record"); Type *OpTy = getTypeByID(Record[0]); - Value *Op = getFnValueByID(Record[1], OpTy); + Value *Op = getValue(Record, 1, NextValueNo, OpTy); Type *ResTy = getTypeByID(Record[2]); if (!OpTy || !Op || !ResTy) return Error("Invalid VAARG record"); @@ -2837,7 +2892,7 @@ bool BitcodeReader::InitStream() { } bool BitcodeReader::InitStreamFromBuffer() { - const unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); + const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart(); const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); if (Buffer->getBufferSize() & 3) { diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index e7c4e94..3d5c0eb 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -179,18 +179,27 @@ class BitcodeReader : public GVMaterializer { typedef std::pair BlockAddrRefTy; DenseMap > BlockAddrFwdRefs; + /// UseRelativeIDs - Indicates that we are using a new encoding for + /// instruction operands where most operands in the current + /// FUNCTION_BLOCK are encoded relative to the instruction number, + /// for a more compact encoding. Some instruction operands are not + /// relative to the instruction ID: basic block numbers, and types. + /// Once the old style function blocks have been phased out, we would + /// not need this flag. + bool UseRelativeIDs; + public: explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false), LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false), ErrorString(0), ValueList(C), MDValueList(C), - SeenFirstFunctionBody(false) { + SeenFirstFunctionBody(false), UseRelativeIDs(false) { } explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C) : Context(C), TheModule(0), Buffer(0), BufferOwned(false), LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), ErrorString(0), ValueList(C), MDValueList(C), - SeenFirstFunctionBody(false) { + SeenFirstFunctionBody(false), UseRelativeIDs(false) { } ~BitcodeReader() { FreeState(); @@ -223,6 +232,9 @@ public: /// @brief Cheap mechanism to just extract module triple /// @returns true if an error occurred. bool ParseTriple(std::string &Triple); + + static uint64_t decodeSignRotatedValue(uint64_t V); + private: Type *getTypeByID(unsigned ID); Value *getFnValueByID(unsigned ID, Type *Ty) { @@ -247,6 +259,9 @@ private: unsigned InstNum, Value *&ResVal) { if (Slot == Record.size()) return true; unsigned ValNo = (unsigned)Record[Slot++]; + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; if (ValNo < InstNum) { // If this is not a forward reference, just return the value we already // have. @@ -255,20 +270,54 @@ private: } else if (Slot == Record.size()) { return true; } - + unsigned TypeNo = (unsigned)Record[Slot++]; ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); return ResVal == 0; } - bool getValue(SmallVector &Record, unsigned &Slot, - Type *Ty, Value *&ResVal) { - if (Slot == Record.size()) return true; - unsigned ValNo = (unsigned)Record[Slot++]; - ResVal = getFnValueByID(ValNo, Ty); + + /// popValue - Read a value out of the specified record from slot 'Slot'. + /// Increment Slot past the number of slots used by the value in the record. + /// Return true if there is an error. + bool popValue(SmallVector &Record, unsigned &Slot, + unsigned InstNum, Type *Ty, Value *&ResVal) { + if (getValue(Record, Slot, InstNum, Ty, ResVal)) + return true; + // All values currently take a single record slot. + ++Slot; + return false; + } + + /// getValue -- Like popValue, but does not increment the Slot number. + bool getValue(SmallVector &Record, unsigned Slot, + unsigned InstNum, Type *Ty, Value *&ResVal) { + ResVal = getValue(Record, Slot, InstNum, Ty); return ResVal == 0; } - + /// getValue -- Version of getValue that returns ResVal directly, + /// or 0 if there is an error. + Value *getValue(SmallVector &Record, unsigned Slot, + unsigned InstNum, Type *Ty) { + if (Slot == Record.size()) return 0; + unsigned ValNo = (unsigned)Record[Slot]; + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; + return getFnValueByID(ValNo, Ty); + } + + /// getValueSigned -- Like getValue, but decodes signed VBRs. + Value *getValueSigned(SmallVector &Record, unsigned Slot, + unsigned InstNum, Type *Ty) { + if (Slot == Record.size()) return 0; + unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); + // Adjust the ValNo, if it was encoded relative to the InstNum. + if (UseRelativeIDs) + ValNo = InstNum - ValNo; + return getFnValueByID(ValNo, Ty); + } + bool ParseModule(bool Resume); bool ParseAttributeBlock(); bool ParseTypeTable(); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 5b1725f..60c657a 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -41,8 +41,6 @@ EnablePreserveUseListOrdering("enable-bc-uselist-preserve", /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { - CurVersion = 0, - // VALUE_SYMTAB_BLOCK abbrev id's. VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV, VST_ENTRY_7_ABBREV, @@ -177,7 +175,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE, for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { const AttributeWithIndex &PAWI = A.getSlot(i); Record.push_back(PAWI.Index); - Record.push_back(Attribute::encodeLLVMAttributesForBitcode(PAWI.Attrs)); + Record.push_back(Attributes::encodeLLVMAttributesForBitcode(PAWI.Attrs)); } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); @@ -365,7 +363,7 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) { case GlobalValue::AvailableExternallyLinkage: return 12; case GlobalValue::LinkerPrivateLinkage: return 13; case GlobalValue::LinkerPrivateWeakLinkage: return 14; - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15; + case GlobalValue::LinkOnceODRAutoHideLinkage: return 15; } llvm_unreachable("Invalid linkage"); } @@ -722,16 +720,20 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) { Stream.ExitBlock(); } +static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { + if ((int64_t)V >= 0) + Vals.push_back(V << 1); + else + Vals.push_back((-V << 1) | 1); +} + static void EmitAPInt(SmallVectorImpl &Vals, unsigned &Code, unsigned &AbbrevToUse, const APInt &Val, bool EmitSizeForWideNumbers = false ) { if (Val.getBitWidth() <= 64) { uint64_t V = Val.getSExtValue(); - if ((int64_t)V >= 0) - Vals.push_back(V << 1); - else - Vals.push_back((-V << 1) | 1); + emitSignedInt64(Vals, V); Code = bitc::CST_CODE_INTEGER; AbbrevToUse = CONSTANTS_INTEGER_ABBREV; } else { @@ -747,11 +749,7 @@ static void EmitAPInt(SmallVectorImpl &Vals, const uint64_t *RawWords = Val.getRawData(); for (unsigned i = 0; i != NWords; ++i) { - int64_t V = RawWords[i]; - if (V >= 0) - Vals.push_back(V << 1); - else - Vals.push_back((-V << 1) | 1); + emitSignedInt64(Vals, RawWords[i]); } Code = bitc::CST_CODE_WIDE_INTEGER; } @@ -814,7 +812,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, if (const InlineAsm *IA = dyn_cast(V)) { Record.push_back(unsigned(IA->hasSideEffects()) | - unsigned(IA->isAlignStack()) << 1); + unsigned(IA->isAlignStack()) << 1 | + unsigned(IA->getDialect()&1) << 2); // Add the asm string. const std::string &AsmStr = IA->getAsmString(); @@ -1024,12 +1023,13 @@ static void WriteModuleConstants(const ValueEnumerator &VE, /// /// This function adds V's value ID to Vals. If the value ID is higher than the /// instruction ID, then it is a forward reference, and it also includes the -/// type ID. +/// type ID. The value ID that is written is encoded relative to the InstID. static bool PushValueAndType(const Value *V, unsigned InstID, SmallVector &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); - Vals.push_back(ValID); + // Make encoding relative to the InstID. + Vals.push_back(InstID - ValID); if (ValID >= InstID) { Vals.push_back(VE.getTypeID(V->getType())); return true; @@ -1037,6 +1037,30 @@ static bool PushValueAndType(const Value *V, unsigned InstID, return false; } +/// pushValue - Like PushValueAndType, but where the type of the value is +/// omitted (perhaps it was already encoded in an earlier operand). +static void pushValue(const Value *V, unsigned InstID, + SmallVector &Vals, + ValueEnumerator &VE) { + unsigned ValID = VE.getValueID(V); + Vals.push_back(InstID - ValID); +} + +static void pushValue64(const Value *V, unsigned InstID, + SmallVector &Vals, + ValueEnumerator &VE) { + uint64_t ValID = VE.getValueID(V); + Vals.push_back(InstID - ValID); +} + +static void pushValueSigned(const Value *V, unsigned InstID, + SmallVector &Vals, + ValueEnumerator &VE) { + unsigned ValID = VE.getValueID(V); + int64_t diff = ((int32_t)InstID - (int32_t)ValID); + emitSignedInt64(Vals, diff); +} + /// WriteInstruction - Emit an instruction to the specified stream. static void WriteInstruction(const Instruction &I, unsigned InstID, ValueEnumerator &VE, BitstreamWriter &Stream, @@ -1057,7 +1081,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Code = bitc::FUNC_CODE_INST_BINOP; if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) AbbrevToUse = FUNCTION_INST_BINOP_ABBREV; - Vals.push_back(VE.getValueID(I.getOperand(1))); + pushValue(I.getOperand(1), InstID, Vals, VE); Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode())); uint64_t Flags = GetOptimizationFlags(&I); if (Flags != 0) { @@ -1095,32 +1119,32 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::Select: Code = bitc::FUNC_CODE_INST_VSELECT; PushValueAndType(I.getOperand(1), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(2))); + pushValue(I.getOperand(2), InstID, Vals, VE); PushValueAndType(I.getOperand(0), InstID, Vals, VE); break; case Instruction::ExtractElement: Code = bitc::FUNC_CODE_INST_EXTRACTELT; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(1))); + pushValue(I.getOperand(1), InstID, Vals, VE); break; case Instruction::InsertElement: Code = bitc::FUNC_CODE_INST_INSERTELT; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(1))); - Vals.push_back(VE.getValueID(I.getOperand(2))); + pushValue(I.getOperand(1), InstID, Vals, VE); + pushValue(I.getOperand(2), InstID, Vals, VE); break; case Instruction::ShuffleVector: Code = bitc::FUNC_CODE_INST_SHUFFLEVEC; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(1))); - Vals.push_back(VE.getValueID(I.getOperand(2))); + pushValue(I.getOperand(1), InstID, Vals, VE); + pushValue(I.getOperand(2), InstID, Vals, VE); break; case Instruction::ICmp: case Instruction::FCmp: // compare returning Int1Ty or vector of Int1Ty Code = bitc::FUNC_CODE_INST_CMP2; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - Vals.push_back(VE.getValueID(I.getOperand(1))); + pushValue(I.getOperand(1), InstID, Vals, VE); Vals.push_back(cast(I).getPredicate()); break; @@ -1146,7 +1170,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(VE.getValueID(II.getSuccessor(0))); if (II.isConditional()) { Vals.push_back(VE.getValueID(II.getSuccessor(1))); - Vals.push_back(VE.getValueID(II.getCondition())); + pushValue(II.getCondition(), InstID, Vals, VE); } } break; @@ -1163,7 +1187,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals64.push_back(SwitchRecordHeader); Vals64.push_back(VE.getTypeID(SI.getCondition()->getType())); - Vals64.push_back(VE.getValueID(SI.getCondition())); + pushValue64(SI.getCondition(), InstID, Vals64, VE); Vals64.push_back(VE.getValueID(SI.getDefaultDest())); Vals64.push_back(SI.getNumCases()); for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); @@ -1214,7 +1238,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::IndirectBr: Code = bitc::FUNC_CODE_INST_INDIRECTBR; Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) + // Encode the address operand as relative, but not the basic blocks. + pushValue(I.getOperand(0), InstID, Vals, VE); + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) Vals.push_back(VE.getValueID(I.getOperand(i))); break; @@ -1233,7 +1259,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Vals.push_back(VE.getValueID(I.getOperand(i))); // fixed param. + pushValue(I.getOperand(i), InstID, Vals, VE); // fixed param. // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { @@ -1255,12 +1281,19 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::PHI: { const PHINode &PN = cast(I); Code = bitc::FUNC_CODE_INST_PHI; - Vals.push_back(VE.getTypeID(PN.getType())); + // With the newer instruction encoding, forward references could give + // negative valued IDs. This is most common for PHIs, so we use + // signed VBRs. + SmallVector Vals64; + Vals64.push_back(VE.getTypeID(PN.getType())); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - Vals.push_back(VE.getValueID(PN.getIncomingValue(i))); - Vals.push_back(VE.getValueID(PN.getIncomingBlock(i))); + pushValueSigned(PN.getIncomingValue(i), InstID, Vals64, VE); + Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i))); } - break; + // Emit a Vals64 vector and exit. + Stream.EmitRecord(Code, Vals64, AbbrevToUse); + Vals64.clear(); + return; } case Instruction::LandingPad: { @@ -1310,7 +1343,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, else Code = bitc::FUNC_CODE_INST_STORE; PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr - Vals.push_back(VE.getValueID(I.getOperand(0))); // val. + pushValue(I.getOperand(0), InstID, Vals, VE); // val. Vals.push_back(Log2_32(cast(I).getAlignment())+1); Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { @@ -1321,8 +1354,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::AtomicCmpXchg: Code = bitc::FUNC_CODE_INST_CMPXCHG; PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr - Vals.push_back(VE.getValueID(I.getOperand(1))); // cmp. - Vals.push_back(VE.getValueID(I.getOperand(2))); // newval. + pushValue(I.getOperand(1), InstID, Vals, VE); // cmp. + pushValue(I.getOperand(2), InstID, Vals, VE); // newval. Vals.push_back(cast(I).isVolatile()); Vals.push_back(GetEncodedOrdering( cast(I).getOrdering())); @@ -1332,7 +1365,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::AtomicRMW: Code = bitc::FUNC_CODE_INST_ATOMICRMW; PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr - Vals.push_back(VE.getValueID(I.getOperand(1))); // val. + pushValue(I.getOperand(1), InstID, Vals, VE); // val. Vals.push_back(GetEncodedRMWOperation( cast(I).getOperation())); Vals.push_back(cast(I).isVolatile()); @@ -1357,8 +1390,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee // Emit value #'s for the fixed parameters. - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Vals.push_back(VE.getValueID(CI.getArgOperand(i))); // fixed param. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { + // Check for labels (can happen with asm labels). + if (FTy->getParamType(i)->isLabelTy()) + Vals.push_back(VE.getValueID(CI.getArgOperand(i))); + else + pushValue(CI.getArgOperand(i), InstID, Vals, VE); // fixed param. + } // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { @@ -1371,7 +1409,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::VAArg: Code = bitc::FUNC_CODE_INST_VAARG; Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty - Vals.push_back(VE.getValueID(I.getOperand(0))); // valist. + pushValue(I.getOperand(0), InstID, Vals, VE); // valist. Vals.push_back(VE.getTypeID(I.getType())); // restype. break; } @@ -1513,8 +1551,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, // Emit blockinfo, which defines the standard abbreviations etc. static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { // We only want to emit block info records for blocks that have multiple - // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. Other - // blocks can defined their abbrevs inline. + // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. + // Other blocks can define their abbrevs inline. Stream.EnterBlockInfoBlock(2); { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings. @@ -1772,12 +1810,10 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE, static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); - // Emit the version number if it is non-zero. - if (CurVersion) { - SmallVector Vals; - Vals.push_back(CurVersion); - Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); - } + SmallVector Vals; + unsigned CurVersion = 1; + Vals.push_back(CurVersion); + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); // Analyze the module, enumerating globals, functions, etc. ValueEnumerator VE(M); diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index a6ca536..75468e6 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -78,9 +78,9 @@ private: unsigned FirstFuncConstantID; unsigned FirstInstID; - - ValueEnumerator(const ValueEnumerator &); // DO NOT IMPLEMENT - void operator=(const ValueEnumerator &); // DO NOT IMPLEMENT + + ValueEnumerator(const ValueEnumerator &) LLVM_DELETED_FUNCTION; + void operator=(const ValueEnumerator &) LLVM_DELETED_FUNCTION; public: ValueEnumerator(const Module *M); diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 205480a..7a1c049 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -635,7 +635,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( --R; const unsigned NewSuperReg = Order[R]; // Don't consider non-allocatable registers - if (!RegClassInfo.isAllocatable(NewSuperReg)) continue; + if (!MRI.isAllocatable(NewSuperReg)) continue; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; @@ -818,7 +818,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!RegClassInfo.isAllocatable(AntiDepReg)) { + if (!MRI.isAllocatable(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); continue; diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 32ad34a..7cde136 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -29,6 +29,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg); std::pair HintPair = VRM.getRegInfo().getRegAllocationHint(VirtReg); + const MachineRegisterInfo &MRI = VRM.getRegInfo(); // HintPair.second is a register, phys or virt. Hint = HintPair.second; @@ -52,7 +53,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, unsigned *P = new unsigned[Order.size()]; Begin = P; for (unsigned i = 0; i != Order.size(); ++i) - if (!RCI.isReserved(Order[i])) + if (!MRI.isReserved(Order[i])) *P++ = Order[i]; End = P; @@ -69,7 +70,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, // The hint must be a valid physreg for allocation. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || RCI.isReserved(Hint))) + !RC->contains(Hint) || MRI.isReserved(Hint))) Hint = 0; } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 447f398..5162ad7 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -21,7 +21,7 @@ #include "llvm/Module.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/ErrorHandling.h" @@ -79,7 +79,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast(Ty)) { - const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy); for (StructType::element_iterator EB = STy->element_begin(), EI = EB, EE = STy->element_end(); @@ -91,7 +91,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast(Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, StartingOffset + i * EltSize); @@ -314,11 +314,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // the return. Ignore noalias because it doesn't affect the call sequence. const Function *F = ExitBB->getParent(); Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) != + AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias)) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + if (CallerRetAttr.hasAttribute(Attributes::ZExt) || + CallerRetAttr.hasAttribute(Attributes::SExt)) return false; // Otherwise, make sure the unmodified return value of I is the return value. @@ -354,11 +356,13 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if (CallerRetAttr & ~Attribute::NoAlias) + if (AttrBuilder(CallerRetAttr) + .removeAttribute(Attributes::NoAlias).hasAttributes()) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + if (CallerRetAttr.hasAttribute(Attributes::ZExt) || + CallerRetAttr.hasAttribute(Attributes::SExt)) return false; // Check if the only use is a function return node. diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index bf5d8c4..b2ebf04 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -24,7 +24,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d9be7a1..d74a703 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -33,7 +33,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -67,7 +67,7 @@ static gcp_map_type &getGCMap(void *&P) { /// getGVAlignmentLog2 - Return the alignment to use for the specified global /// value in log2 form. This rounds up to the preferred alignment if possible /// and legal. -static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, unsigned InBits = 0) { unsigned NumBits = 0; if (const GlobalVariable *GVar = dyn_cast(GV)) @@ -131,9 +131,9 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { } -/// getTargetData - Return information about data layout. -const TargetData &AsmPrinter::getTargetData() const { - return *TM.getTargetData(); +/// getDataLayout - Return information about data layout. +const DataLayout &AsmPrinter::getDataLayout() const { + return *TM.getDataLayout(); } /// getCurrentSection() - Return the current section we are emitting to. @@ -160,7 +160,7 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(OutContext, *TM.getTargetData()); + Mang = new Mangler(OutContext, *TM.getDataLayout()); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -213,16 +213,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkerPrivateWeakDefAutoLinkage) + GlobalValue::LinkOnceODRAutoHideLinkage) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else @@ -280,7 +280,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global @@ -312,8 +312,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; } - if (MAI->getLCOMMDirectiveType() != LCOMM::None && - (MAI->getLCOMMDirectiveType() != LCOMM::NoAlignment || Align == 1)) { + if (Align == 1 || + MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align); return; @@ -482,9 +482,8 @@ void AsmPrinter::EmitFunctionEntryLabel() { "' label emitted multiple times to assembly file"); } - -/// EmitComments - Pretty-print comments for instructions. -static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { +/// emitComments - Pretty-print comments for instructions. +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); const TargetMachine &TM = MF->getTarget(); @@ -519,16 +518,16 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { CommentOS << " Reload Reuse\n"; } -/// EmitImplicitDef - This method emits the specified machine instruction +/// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. -static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { +static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { unsigned RegNo = MI->getOperand(0).getReg(); AP.OutStreamer.AddComment(Twine("implicit-def: ") + AP.TM.getRegisterInfo()->getName(RegNo)); AP.OutStreamer.AddBlankLine(); } -static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { +static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { std::string Str = "kill:"; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); @@ -541,10 +540,10 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer.AddBlankLine(); } -/// EmitDebugValueComment - This method handles the target-independent form +/// emitDebugValueComment - This method handles the target-independent form /// of DBG_VALUE, returning true if it was able to do so. A false return /// means the target will need to handle MI in EmitInstruction. -static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { +static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // This code handles only the 3-operand target-independent form. if (MI->getNumOperands() != 3) return false; @@ -674,7 +673,7 @@ void AsmPrinter::EmitFunctionBody() { } if (isVerbose()) - EmitComments(*II, OutStreamer.GetCommentOS()); + emitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { case TargetOpcode::PROLOG_LABEL: @@ -690,15 +689,15 @@ void AsmPrinter::EmitFunctionBody() { break; case TargetOpcode::DBG_VALUE: if (isVerbose()) { - if (!EmitDebugValueComment(II, *this)) + if (!emitDebugValueComment(II, *this)) EmitInstruction(II); } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) EmitImplicitDef(II, *this); + if (isVerbose()) emitImplicitDef(II, *this); break; case TargetOpcode::KILL: - if (isVerbose()) EmitKill(II, *this); + if (isVerbose()) emitKill(II, *this); break; default: if (!TM.hasMCUseLoc()) @@ -992,7 +991,7 @@ void AsmPrinter::EmitConstantPool() { Kind = SectionKind::getReadOnlyWithRelLocal(); break; case 0: - switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { + switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) { case 4: Kind = SectionKind::getMergeableConst4(); break; case 8: Kind = SectionKind::getMergeableConst8(); break; case 16: Kind = SectionKind::getMergeableConst16();break; @@ -1038,7 +1037,7 @@ void AsmPrinter::EmitConstantPool() { OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/); Type *Ty = CPE.getType(); - Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); + Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); OutStreamer.EmitLabel(GetCPISymbol(CPI)); if (CPE.isMachineConstantPoolEntry()) @@ -1081,7 +1080,12 @@ void AsmPrinter::EmitJumpTableInfo() { JTInDiffSection = true; } - EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); + EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout()))); + + // Jump tables in code sections are marked with a data_region directive + // where that's supported. + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionJT32); for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector &JTBBs = JT[JTI].MBBs; @@ -1123,6 +1127,8 @@ void AsmPrinter::EmitJumpTableInfo() { for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); } + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the @@ -1190,7 +1196,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); - unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData()); + unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout()); OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); } @@ -1292,7 +1298,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); unsigned Align = Log2_32(TD->getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), priority_order); for (unsigned i = 0, e = Structors.size(); i != e; ++i) { @@ -1408,7 +1414,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // if required for correctness. // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { - if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits); + if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. @@ -1422,9 +1428,9 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { // Constant emission. //===----------------------------------------------------------------------===// -/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. +/// lowerConstant - Lower the specified LLVM Constant to an MCExpr. /// -static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { +static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { MCContext &Ctx = AP.OutContext; if (CV->isNullValue() || isa(CV)) @@ -1447,12 +1453,12 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding - // opportunities. Attempt to fold the expression using TargetData as a + // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) + ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) if (C != CE) - return LowerConstant(C, AP); + return lowerConstant(C, AP); // Otherwise report the problem to the user. { @@ -1464,21 +1470,20 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address const Constant *PtrVal = CE->getOperand(0); SmallVector IdxVec(CE->op_begin()+1, CE->op_end()); int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); - const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); + const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); if (Offset == 0) return Base; // Truncate/sext the offset to the pointer size. - if (TD.getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD.getPointerSizeInBits(); - Offset = (Offset << SExtAmount) >> SExtAmount; - } + unsigned Width = TD.getPointerSizeInBits(); + if (Width < 64) + Offset = SignExtend64(Offset, Width); return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); @@ -1491,26 +1496,26 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { // is reasonable to treat their delta as a 32-bit value. // FALL THROUGH. case Instruction::BitCast: - return LowerConstant(CE->getOperand(0), AP); + return lowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), false/*ZExt*/); - return LowerConstant(Op, AP); + return lowerConstant(Op, AP); } case Instruction::PtrToInt: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); Type *Ty = CE->getType(); - const MCExpr *OpExpr = LowerConstant(Op, AP); + const MCExpr *OpExpr = lowerConstant(Op, AP); // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. @@ -1536,8 +1541,8 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); - const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); + const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP); + const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); @@ -1554,7 +1559,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { } } -static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, +static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, AsmPrinter &AP); /// isRepeatedByteSequence - Determine whether the given value is @@ -1578,7 +1583,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (const ConstantInt *CI = dyn_cast(V)) { if (CI->getBitWidth() > 64) return -1; - uint64_t Size = TM.getTargetData()->getTypeAllocSize(V->getType()); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType()); uint64_t Value = CI->getZExtValue(); // Make sure the constant is at least 8 bits long and has a power @@ -1616,13 +1621,13 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { return -1; } -static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, +static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, unsigned AddrSpace,AsmPrinter &AP){ // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); if (Value != -1) { - uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType()); + uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); @@ -1672,7 +1677,7 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } } - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); unsigned Size = TD.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); @@ -1681,28 +1686,28 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } -static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, +static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, AsmPrinter &AP) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); if (Value != -1) { - uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType()); + uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); } } -static void EmitGlobalConstantVector(const ConstantVector *CV, +static void emitGlobalConstantVector(const ConstantVector *CV, unsigned AddrSpace, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); unsigned Size = TD.getTypeAllocSize(CV->getType()); unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); @@ -1710,10 +1715,10 @@ static void EmitGlobalConstantVector(const ConstantVector *CV, AP.OutStreamer.EmitZeros(Padding, AddrSpace); } -static void EmitGlobalConstantStruct(const ConstantStruct *CS, +static void emitGlobalConstantStruct(const ConstantStruct *CS, unsigned AddrSpace, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! - const TargetData *TD = AP.TM.getTargetData(); + const DataLayout *TD = AP.TM.getDataLayout(); unsigned Size = TD->getTypeAllocSize(CS->getType()); const StructLayout *Layout = TD->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; @@ -1727,7 +1732,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - EmitGlobalConstantImpl(Field, AddrSpace, AP); + emitGlobalConstantImpl(Field, AddrSpace, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well @@ -1738,7 +1743,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, "Layout of constant struct may be incorrect!"); } -static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, +static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { if (CFP->getType()->isHalfTy()) { if (AP.isVerbose()) { @@ -1793,7 +1798,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, << DoubleVal.convertToDouble() << '\n'; } - if (AP.TM.getTargetData()->isBigEndian()) { + if (AP.TM.getDataLayout()->isBigEndian()) { AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); } else { @@ -1802,7 +1807,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, } // Emit the tail padding for the long double. - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - TD.getTypeStoreSize(CFP->getType()), AddrSpace); return; @@ -1814,7 +1819,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, // API needed to prevent premature destruction. APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); - if (AP.TM.getTargetData()->isBigEndian()) { + if (AP.TM.getDataLayout()->isBigEndian()) { AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); } else { @@ -1823,9 +1828,9 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, } } -static void EmitGlobalConstantLargeInt(const ConstantInt *CI, +static void emitGlobalConstantLargeInt(const ConstantInt *CI, unsigned AddrSpace, AsmPrinter &AP) { - const TargetData *TD = AP.TM.getTargetData(); + const DataLayout *TD = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); @@ -1839,9 +1844,9 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, } } -static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, +static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AsmPrinter &AP) { - const TargetData *TD = AP.TM.getTargetData(); + const DataLayout *TD = AP.TM.getDataLayout(); uint64_t Size = TD->getTypeAllocSize(CV->getType()); if (isa(CV) || isa(CV)) return AP.OutStreamer.EmitZeros(Size, AddrSpace); @@ -1858,13 +1863,13 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: - EmitGlobalConstantLargeInt(CI, AddrSpace, AP); + emitGlobalConstantLargeInt(CI, AddrSpace, AP); return; } } if (const ConstantFP *CFP = dyn_cast(CV)) - return EmitGlobalConstantFP(CFP, AddrSpace, AP); + return emitGlobalConstantFP(CFP, AddrSpace, AP); if (isa(CV)) { AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); @@ -1872,19 +1877,19 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, } if (const ConstantDataSequential *CDS = dyn_cast(CV)) - return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP); + return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); if (const ConstantArray *CVA = dyn_cast(CV)) - return EmitGlobalConstantArray(CVA, AddrSpace, AP); + return emitGlobalConstantArray(CVA, AddrSpace, AP); if (const ConstantStruct *CVS = dyn_cast(CV)) - return EmitGlobalConstantStruct(CVS, AddrSpace, AP); + return emitGlobalConstantStruct(CVS, AddrSpace, AP); if (const ConstantExpr *CE = dyn_cast(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have @@ -1892,23 +1897,23 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, // that way. Constant *New = ConstantFoldConstantExpression(CE, TD); if (New && New != CE) - return EmitGlobalConstantImpl(New, AddrSpace, AP); + return emitGlobalConstantImpl(New, AddrSpace, AP); } } if (const ConstantVector *V = dyn_cast(CV)) - return EmitGlobalConstantVector(V, AddrSpace, AP); + return emitGlobalConstantVector(V, AddrSpace, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace); + AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { - uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) - EmitGlobalConstantImpl(CV, AddrSpace, *this); + emitGlobalConstantImpl(CV, AddrSpace, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. @@ -2023,8 +2028,8 @@ static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, } } -/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks. -static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, +/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks. +static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, const MachineLoopInfo *LI, const AsmPrinter &AP) { // Add loop depth information @@ -2090,7 +2095,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { if (const BasicBlock *BB = MBB->getBasicBlock()) if (BB->hasName()) OutStreamer.AddComment("%" + BB->getName()); - EmitBasicBlockLoopComments(*MBB, LI, *this); + emitBasicBlockLoopComments(*MBB, LI, *this); } // Print the main label for the block. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 90d511c..d94e1fe 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -18,7 +18,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { switch (Encoding & 0x07) { default: llvm_unreachable("Invalid encoded value."); - case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize(); + case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: return 4; case dwarf::DW_EH_PE_udata8: return 8; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index db43b06..50f0fc3 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -43,10 +43,10 @@ namespace { }; } -/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an +/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an /// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo /// struct above. -static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { +static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { SrcMgrDiagInfo *DiagInfo = static_cast(diagInfo); assert(DiagInfo && "Diagnostic context not passed down?"); @@ -68,7 +68,8 @@ static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { } /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { +void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, + InlineAsm::AsmDialect Dialect) const { assert(!Str.empty() && "Can't emit empty inline asm block"); // Remember if the buffer is nul terminated or not so we can avoid a copy. @@ -91,12 +92,12 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { - // If the source manager has an issue, we arrange for SrcMgrDiagHandler + // If the source manager has an issue, we arrange for srcMgrDiagHandler // to be invoked, getting DiagInfo passed into it. DiagInfo.LocInfo = LocMDNode; DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); - SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo); + SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo); HasDiagHandler = true; } @@ -126,6 +127,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); + Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. @@ -135,71 +137,113 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { report_fatal_error("Error parsing inline asm\n"); } +static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + AsmPrinter *AP, unsigned LocCookie, + raw_ostream &OS) { + // Switch to the inline assembly variant. + OS << "\t.intel_syntax\n\t"; -/// EmitInlineAsm - This method formats and emits the specified machine -/// instruction that is an inline asm. -void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { - assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); - + const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); - // Count the number of register definitions to find the asm string. - unsigned NumDefs = 0; - for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); - ++NumDefs) - assert(NumDefs != NumOperands-2 && "No asm string?"); + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; - assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; - // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. - const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': + ++LastEmitted; // Consume second '$' character. + break; + } + if (Done) break; - // If this asmstr is empty, just print the #APP/#NOAPP markers. - // These are useful to see where empty asm's wound up. - if (AsmStr[0] == 0) { - // Don't emit the comments if writing to a .o file. - if (!OutStreamer.hasRawTextSupport()) return; + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmEnd()); - return; - } + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; - // Emit the #APP start marker. This has to happen even if verbose-asm isn't - // enabled, so we use EmitRawText. - if (OutStreamer.hasRawTextSupport()) - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); - // Get the !srcloc metadata node if we have it, and decode the loc cookie from - // it. - unsigned LocCookie = 0; - const MDNode *LocMD = 0; - for (unsigned i = MI->getNumOperands(); i != 0; --i) { - if (MI->getOperand(i-1).isMetadata() && - (LocMD = MI->getOperand(i-1).getMetadata()) && - LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = dyn_cast(LocMD->getOperand(0))) { - LocCookie = CI->getZExtValue(); - break; - } - } - } + // Okay, we finally have a value number. Ask the target to print this + // operand! + unsigned OpNo = InlineAsm::MIOp_FirstOperand; - // Emit the inline asm to a temporary string so we can emit it through - // EmitInlineAsm. - SmallString<256> StringData; - raw_svector_ostream OS(StringData); + bool Error = false; - OS << '\t'; + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } - // The variant of the current asmprinter. - int AsmPrinterVariant = MAI->getAssemblerDialect(); + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ 0, OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ 0, OS); + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + break; + } + } + } + OS << "\n\t.att_syntax\n" << (char)0; // null terminate string. +} +static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + int AsmPrinterVariant, AsmPrinter *AP, + unsigned LocCookie, raw_ostream &OS) { int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + OS << '\t'; while (*LastEmitted) { switch (*LastEmitted) { @@ -272,7 +316,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { " string: '" + Twine(AsmStr) + "'"); std::string Val(StrStart, StrEnd); - PrintSpecial(MI, OS, Val.c_str()); + AP->PrintSpecial(MI, OS, Val.c_str()); LastEmitted = StrEnd+1; break; } @@ -340,13 +384,12 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // FIXME: What if the operand isn't an MBB, report error? OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); else { - AsmPrinter *AP = const_cast(this); if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, Modifier[0] ? Modifier : 0, OS); } else { - Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant, + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, Modifier[0] ? Modifier : 0, OS); } } @@ -363,7 +406,74 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { } } OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str(), LocMD); +} + +/// EmitInlineAsm - This method formats and emits the specified machine +/// instruction that is an inline asm. +void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { + assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + // Don't emit the comments if writing to a .o file. + if (!OutStreamer.hasRawTextSupport()) return; + + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmEnd()); + return; + } + + // Emit the #APP start marker. This has to happen even if verbose-asm isn't + // enabled, so we use EmitRawText. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + const MDNode *LocMD = 0; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata() && + (LocMD = MI->getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + // Emit the inline asm to a temporary string so we can emit it through + // EmitInlineAsm. + SmallString<256> StringData; + raw_svector_ostream OS(StringData); + + // The variant of the current asmprinter. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect(); + AsmPrinter *AP = const_cast(this); + if (InlineAsmVariant == InlineAsm::AD_ATT) + EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant, + AP, LocCookie, OS); + else + EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); + + EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. @@ -409,8 +519,8 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, /// instruction, using the specified assembler variant. Targets should /// override this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) { + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 3776848..4d73b3c 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -17,7 +17,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -182,6 +182,12 @@ void DIEValue::dump() { void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { unsigned Size = ~0U; switch (Form) { + case dwarf::DW_FORM_flag_present: + // Emit something to keep the lines and comments in sync. + // FIXME: Is there a better way to do this? + if (Asm->OutStreamer.hasRawTextSupport()) + Asm->OutStreamer.EmitRawText(StringRef("")); + return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru case dwarf::DW_FORM_data1: Size = 1; break; @@ -193,7 +199,8 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; - case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break; + case dwarf::DW_FORM_addr: + Size = Asm->getDataLayout().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); @@ -203,6 +210,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { switch (Form) { + case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru case dwarf::DW_FORM_data1: return sizeof(int8_t); @@ -214,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); - case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize(); + case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -241,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getTargetData().getPointerSize(); + return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG @@ -265,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getTargetData().getPointerSize(); + return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index f93ea1b..28a96f3 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -214,9 +214,6 @@ namespace llvm { /// virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *) { return true; } - #ifndef NDEBUG virtual void print(raw_ostream &O) = 0; void dump(); @@ -257,7 +254,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEInteger *) { return true; } static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG @@ -286,7 +282,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIELabel *) { return true; } static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG @@ -313,7 +308,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEDelta *) { return true; } static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG @@ -343,7 +337,6 @@ namespace llvm { } // Implement isa/cast/dyncast. - static bool classof(const DIEEntry *) { return true; } static bool classof(const DIEValue *E) { return E->getType() == isEntry; } #ifndef NDEBUG @@ -383,7 +376,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEBlock *) { return true; } static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 454a923..05e0f2f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -133,8 +133,8 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { } } -// Walk through and emit the buckets for the table. This will look -// like a list of numbers of how many elements are in each bucket. +// Walk through and emit the buckets for the table. Each index is +// an offset into the list of hashes. void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { unsigned index = 0; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 963b8cd..92d1bbe 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -237,8 +237,8 @@ private: #endif }; - DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT - void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT + DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; // Internal Functions void EmitHeader(AsmPrinter *); diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index d975f1f..4fdd5ca 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -25,7 +25,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d30e5bb..2b07dda 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -22,7 +22,7 @@ #include "llvm/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -51,6 +51,15 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { return Value; } +/// addFlag - Add a flag that is true. +void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { + if (!DD->useDarwinGDBCompat()) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, + DIEIntegerOne); + else + addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); +} + /// addUInt - Add an unsigned integer attribute data and value. /// void CompileUnit::addUInt(DIE *Die, unsigned Attribute, @@ -501,7 +510,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { const char *FltPtr = (const char*)FltVal.getRawData(); int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); int Incr = (LittleEndian ? 1 : -1); int Start = (LittleEndian ? 0 : NumBytes - 1); int Stop = (LittleEndian ? NumBytes : -1); @@ -543,7 +552,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, const uint64_t *Ptr64 = Val.getRawData(); int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); // Output the constant to DWARF one byte at a time. for (int i = 0; i < NumBytes; i++) { @@ -794,7 +803,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_prototyped); } break; case dwarf::DW_TAG_structure_type: @@ -825,15 +834,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (SP.isExplicit()) - addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + addFlag(ElemDie, dwarf::DW_AT_explicit); } else if (Element.isVariable()) { DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); addString(ElemDie, dwarf::DW_AT_name, DV.getName()); addType(ElemDie, DV.getType()); - addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(ElemDie, dwarf::DW_AT_declaration); + addFlag(ElemDie, dwarf::DW_AT_external); addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); @@ -883,7 +892,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isAppleBlockExtension()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_APPLE_block); DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) @@ -895,8 +904,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isObjcClassComplete()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type, - dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. @@ -929,7 +937,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // If we're a forward decl, say so. if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_declaration); // Add source line info if available. if (!CTy.isForwardDecl()) @@ -1028,8 +1036,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // AT_specification code in order to work around a bug in older // gdbs that requires the linkage name to resolve multiple template // functions. + // TODO: Remove this set of code when we get rid of the old gdb + // compatibility. StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) + if (!LinkageName.empty() && DD->useDarwinGDBCompat()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); @@ -1043,6 +1053,11 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { return SPDie; } + // Add the linkage name if we have one. + if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP.getName()); @@ -1055,7 +1070,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_prototyped); // Add Return Type. DICompositeType SPTy = SP.getType(); @@ -1079,7 +1094,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { } if (!SP.isDefinition()) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. @@ -1090,22 +1105,22 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); + DIType ATy = DIType(Args.getElement(i)); addType(Arg, ATy); if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + addFlag(Arg, dwarf::DW_AT_artificial); SPDie->addChild(Arg); } } if (SP.isArtificial()) - addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_artificial); if (!SP.isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_external); if (SP.isOptimized()) - addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); if (unsigned isa = Asm->getISAEncoding()) { addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); @@ -1168,7 +1183,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // Add scoping info. if (!GV.isLocalToUnit()) - addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(VariableDIE, dwarf::DW_AT_external); // Add line number info. addSourceLine(VariableDIE, GV); @@ -1193,8 +1208,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, - 1); + addFlag(VariableDIE, dwarf::DW_AT_declaration); addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); @@ -1213,7 +1227,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); SmallVector Idx(CE->op_begin()+1, CE->op_end()); addUInt(Block, 0, dwarf::DW_FORM_udata, - Asm->getTargetData().getIndexedOffset(Ptr->getType(), Idx)); + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } @@ -1260,7 +1274,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy) { Buffer.setTag(dwarf::DW_TAG_array_type); if (CTy->getTag() == dwarf::DW_TAG_vector_type) - addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_GNU_vector); // Emit derived type. addType(&Buffer, CTy->getTypeDerivedFrom()); @@ -1333,8 +1347,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { } if (DV->isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, - dwarf::DW_FORM_flag, 1); + addFlag(VariableDie, dwarf::DW_AT_artificial); if (isScopeAbstract) { DV->setDIE(VariableDie); @@ -1446,7 +1459,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { Offset -= FieldOffset; // Maybe we need to work from the other end. - if (Asm->getTargetData().isLittleEndian()) + if (Asm->getDataLayout().isLittleEndian()) Offset = FieldSize - (Offset + Size); addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index b4ff9e8..fad9b6e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -176,6 +176,9 @@ public: } public: + /// addFlag - Add a flag that is true to the DIE. + void addFlag(DIE *Die, unsigned Attribute); + /// addUInt - Add an unsigned integer attribute data and value. /// void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); @@ -280,8 +283,8 @@ public: /// for the given DITemplateTypeParameter. DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateValueParameter. + /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create + /// new DIE for the given DITemplateValueParameter. DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 649684a..367b523 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -27,7 +27,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -54,9 +54,29 @@ static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); -static cl::opt DwarfAccelTables("dwarf-accel-tables", cl::Hidden, +namespace { + enum DefaultOnOff { + Default, Enable, Disable + }; +} + +static cl::opt DwarfAccelTables("dwarf-accel-tables", cl::Hidden, cl::desc("Output prototype dwarf accelerator tables."), - cl::init(false)); + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); + +static cl::opt DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, + cl::desc("Compatibility with Darwin gdb."), + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); namespace { const char *DWARFGroupName = "DWARF Emission"; @@ -135,10 +155,25 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - // Turn on accelerator tables for Darwin. - if (Triple(M->getTargetTriple()).isOSDarwin()) - DwarfAccelTables = true; - + // Turn on accelerator tables and older gdb compatibility + // for Darwin. + bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin(); + if (DarwinGDBCompat == Default) { + if (isDarwin) + isDarwinGDBCompat = true; + else + isDarwinGDBCompat = false; + } else + isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; + + if (DwarfAccelTables == Default) { + if (isDarwin) + hasDwarfAccelTables = true; + else + hasDwarfAccelTables = false; + } else + hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); @@ -272,44 +307,51 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (!SPDecl.isSubprogram()) { - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { - SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); - SPCU->addType(Arg, ATy); - if (ATy.isArtificial()) - SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); - SPDie->addChild(Arg); - } - DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPDeclDie); - SPCU->addDie(SPDie); - } - } - // Pick up abstract subprogram DIE. + // If we're updating an abstract DIE, then we will be adding the children and + // object pointer later on. But what we don't want to do is process the + // concrete DIE twice. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + // Pick up abstract subprogram DIE. SPDie = new DIE(dwarf::DW_TAG_subprogram); SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsSPDIE); SPCU->addDie(SPDie); + } else { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (!SPDecl.isSubprogram()) { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(Args.getElement(i)); + SPCU->addType(Arg, ATy); + if (ATy.isArtificial()) + SPCU->addFlag(Arg, dwarf::DW_AT_artificial); + if (ATy.isObjectPointer()) + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, + dwarf::DW_FORM_ref4, Arg); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + SPCU->addDie(SPDie); + } + } } SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, @@ -346,7 +388,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() - * Asm->getTargetData().getPointerSize()); + * Asm->getDataLayout().getPointerSize()); for (SmallVector::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -386,7 +428,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DISubprogram InlinedSP = getDISubprogram(DS); DIE *OriginDIE = TheCU->getDIE(InlinedSP); if (!OriginDIE) { - DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram."); + DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram."); return NULL; } @@ -395,7 +437,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { - llvm_unreachable("Unexpected Start and End labels for a inlined scope!"); + llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); } assert(StartLabel->isDefined() && "Invalid starting label for an inlined scope!"); @@ -412,7 +454,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() - * Asm->getTargetData().getPointerSize()); + * Asm->getDataLayout().getPointerSize()); for (SmallVector::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -461,21 +503,26 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { return NULL; SmallVector Children; + DIE *ObjectPointer = NULL; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) if (DIE *Arg = - TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) + TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); + if (ArgDV->isObjectPointer()) ObjectPointer = Arg; + } // Collect lexical scope children first. const SmallVector &Variables = ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) if (DIE *Variable = - TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) + TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); + if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; + } const SmallVector &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) @@ -509,6 +556,10 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { E = Children.end(); I != E; ++I) ScopeDIE->addChild(*I); + if (DS.isSubprogram() && ObjectPointer != NULL) + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, + dwarf::DW_FORM_ref4, ObjectPointer); + if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); @@ -556,7 +607,8 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { unsigned ID = GetOrCreateSourceID(FN, CompilationDir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this); + CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, + Asm, this); NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); @@ -575,7 +627,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) - NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) @@ -755,7 +807,7 @@ void DwarfDebug::endModule() { LexicalScope *Scope = new LexicalScope(NULL, DIDescriptor(SP), NULL, false); DeadFnScopeMap[SP] = Scope; - + // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); assert(SPCU && "Unable to find Compile Unit!"); @@ -802,9 +854,9 @@ void DwarfDebug::endModule() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); // End text sections. - for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { - Asm->OutStreamer.SwitchSection(SectionMap[i]); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i)); + for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) { + Asm->OutStreamer.SwitchSection(SectionMap[I]); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); } // Compute DIE offsets and sizes. @@ -816,8 +868,8 @@ void DwarfDebug::endModule() { // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit info into a dwarf accelerator table sections. - if (DwarfAccelTables) { + // Emit info into the dwarf accelerator table sections. + if (useDwarfAccelTables()) { emitAccelNames(); emitAccelObjC(); emitAccelNamespaces(); @@ -825,7 +877,10 @@ void DwarfDebug::endModule() { } // Emit info into a debug pubtypes section. - emitDebugPubTypes(); + // TODO: When we don't need the option anymore we can + // remove all of the code that adds to the table. + if (useDarwinGDBCompat()) + emitDebugPubTypes(); // Emit info into a debug loc section. emitDebugLoc(); @@ -840,7 +895,11 @@ void DwarfDebug::endModule() { emitDebugMacInfo(); // Emit inline info. - emitDebugInlineInfo(); + // TODO: When we don't need the option anymore we + // can remove all of the code that this section + // depends upon. + if (useDarwinGDBCompat()) + emitDebugInlineInfo(); // Emit info into a debug str section. emitDebugStr(); @@ -1014,7 +1073,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, if (AbsVar) AbsVar->setMInsn(MInsn); - // Simple ranges that are fully coalesced. + // Simplify ranges that are fully coalesced. if (History.size() <= 1 || (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) { RegVar->setMInsn(MInsn); @@ -1267,7 +1326,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Coalesce identical entries at the end of History. if (History.size() >= 2 && Prev->isIdenticalTo(History[History.size() - 2])) { - DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n" + DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" << "\t" << *Prev << "\t" << *History[History.size() - 2] << "\n"); History.pop_back(); @@ -1283,7 +1342,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. - DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n" + DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" << "\t" << *Prev << "\n"); History.pop_back(); } @@ -1300,9 +1359,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MI->isLabel()) AtBlockEntry = false; - // First known non DBG_VALUE location marks beginning of function - // body. - if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()) + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + if (!MI->getFlag(MachineInstr::FrameSetup) && + (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())) PrologEndLoc = MI->getDebugLoc(); // Check if the instruction clobbers any registers with debug vars. @@ -1382,7 +1442,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0); + 0); } } @@ -1439,8 +1499,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) - TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); + TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), MMI->getFrameMoves())); @@ -1710,7 +1769,7 @@ void DwarfDebug::emitDebugInfo() { Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), DwarfAbbrevSectionSym); Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize()); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); emitDIE(Die); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); @@ -1756,14 +1815,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(0); Asm->OutStreamer.AddComment("Op size"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1); Asm->OutStreamer.AddComment("DW_LNE_set_address"); Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->OutStreamer.AddComment("Section end label"); Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getTargetData().getPointerSize(), + Asm->getDataLayout().getPointerSize(), 0/*AddrSpace*/); // Mark end of matrix. @@ -1992,7 +2051,7 @@ void DwarfDebug::emitDebugLoc() { // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); - unsigned char Size = Asm->getTargetData().getPointerSize(); + unsigned char Size = Asm->getDataLayout().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; for (SmallVector::iterator @@ -2089,7 +2148,7 @@ void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); - unsigned char Size = Asm->getTargetData().getPointerSize(); + unsigned char Size = Asm->getDataLayout().getPointerSize(); for (SmallVector::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { @@ -2147,7 +2206,7 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.AddComment("Dwarf Version"); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize()); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); for (SmallVector::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { @@ -2178,7 +2237,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getTargetData().getPointerSize(),0); + Asm->getDataLayout().getPointerSize(),0); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index d1d6512..61d9a51 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -21,9 +21,9 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/UniqueVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DebugLoc.h" @@ -96,7 +96,8 @@ typedef struct DotDebugLocEntry { DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) : Begin(B), End(E), Variable(0), Merged(false), Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, + const ConstantInt *IPtr) : Begin(B), End(E), Variable(0), Merged(false), Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } @@ -158,11 +159,19 @@ public: bool isArtificial() const { if (Var.isArtificial()) return true; - if (Var.getTag() == dwarf::DW_TAG_arg_variable - && getType().isArtificial()) + if (getType().isArtificial()) return true; return false; } + + bool isObjectPointer() const { + if (Var.isObjectPointer()) + return true; + if (getType().isObjectPointer()) + return true; + return false; + } + bool variableHasComplexAddress() const { assert(Var.Verify() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); @@ -222,7 +231,7 @@ class DwarfDebug { /// SectionMap - Provides a unique id per text section. /// - UniqueVector SectionMap; + SetVector SectionMap; /// CurrentFnArguments - List of Arguments (DbgValues) for current function. SmallVector CurrentFnArguments; @@ -307,6 +316,9 @@ class DwarfDebug { // table for the same directory as DW_at_comp_dir. StringRef CompilationDir; + // A holder for the DarwinGDBCompat flag so that the compile unit can use it. + bool isDarwinGDBCompat; + bool hasDwarfAccelTables; private: /// assignAbbrevNumber - Define a unique number for the abbreviation. @@ -520,6 +532,11 @@ public: /// getStringPoolEntry - returns an entry into the string pool with the given /// string text. MCSymbol *getStringPoolEntry(StringRef Str); + + /// useDarwinGDBCompat - returns whether or not to limit some of our debug + /// output to the limitations of darwin gdb. + bool useDarwinGDBCompat() { return isDarwinGDBCompat; } + bool useDwarfAccelTables() { return hasDwarfAccelTables; } }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 70cc2e5..08fb6b3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -24,7 +24,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() { // that we're omitting that bit. TTypeEncoding = dwarf::DW_EH_PE_omit; // dwarf::DW_EH_PE_absptr - TypeFormatSize = Asm->getTargetData().getPointerSize(); + TypeFormatSize = Asm->getDataLayout().getPointerSize(); } else { // Okay, we have actual filters or typeinfos to emit. As such, we need to // pick a type encoding for them. We're about to emit a list of pointers to diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 75f6056..fe9e493 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -43,26 +43,6 @@ protected: /// MMI - Collected machine module information. MachineModuleInfo *MMI; - /// EmitExceptionTable - Emit landing pads and actions. - /// - /// The general organization of the table is complex, but the basic concepts - /// are easy. First there is a header which describes the location and - /// organization of the three components that follow. - /// 1. The landing pad site information describes the range of code covered - /// by the try. In our case it's an accumulation of the ranges covered - /// by the invokes in the try. There is also a reference to the landing - /// pad that handles the exception once processed. Finally an index into - /// the actions table. - /// 2. The action table, in our case, is composed of pairs of type ids - /// and next action offset. Starting with the action index from the - /// landing pad site, each type Id is checked for a match to the current - /// exception. If it matches then the exception and type id are passed - /// on to the landing pad. Otherwise the next action is looked up. This - /// chain is terminated with a next action of zero. If no type id is - /// found the frame is unwound and handling continues. - /// 3. Type id table contains references to all the C++ typeinfo for all - /// catches in the function. This tables is reversed indexed base 1. - /// SharedTypeIds - How many leading type ids two landing pads have in common. static unsigned SharedTypeIds(const LandingPadInfo *L, const LandingPadInfo *R); @@ -119,6 +99,26 @@ protected: const RangeMapType &PadMap, const SmallVectorImpl &LPs, const SmallVectorImpl &FirstActions); + + /// EmitExceptionTable - Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. void EmitExceptionTable(); public: diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 1153817..f7c0119 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -20,7 +20,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallString.h" @@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { /// either condition is detected in a function which uses the GC. /// void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize(); + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(getModule(), AP, "code_end"); diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index b83aa5a..70742a8 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -24,7 +24,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index fb65bb7..6f4c5a2 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -357,9 +357,8 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, if (I1 == MBB1->begin() && I2 != MBB2->begin()) { --I2; while (I2->isDebugValue()) { - if (I2 == MBB2->begin()) { + if (I2 == MBB2->begin()) return TailLen; - } --I2; } ++I2; @@ -482,21 +481,19 @@ bool BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { if (getHash() < o.getHash()) return true; - else if (getHash() > o.getHash()) + if (getHash() > o.getHash()) return false; - else if (getBlock()->getNumber() < o.getBlock()->getNumber()) + if (getBlock()->getNumber() < o.getBlock()->getNumber()) return true; - else if (getBlock()->getNumber() > o.getBlock()->getNumber()) + if (getBlock()->getNumber() > o.getBlock()->getNumber()) return false; - else { - // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing - // an object with itself. + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. #ifndef _GLIBCXX_DEBUG - llvm_unreachable("Predecessor appears twice"); + llvm_unreachable("Predecessor appears twice"); #else - return false; + return false; #endif - } } /// CountTerminators - Count the number of terminators in the given @@ -574,7 +571,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + MF->getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; @@ -1554,8 +1552,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Uses.insert(*AI); } else { - if (Uses.count(Reg)) { - Uses.erase(Reg); + if (Uses.erase(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Uses.erase(*SubRegs); // Use sub-registers to be conservative } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2e189ad..fa6d4e1 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -45,6 +45,7 @@ add_llvm_library(LLVMCodeGen MachineCopyPropagation.cpp MachineCSE.cpp MachineDominators.cpp + MachinePostDominators.cpp MachineFunction.cpp MachineFunctionAnalysis.cpp MachineFunctionPass.cpp @@ -95,12 +96,14 @@ add_llvm_library(LLVMCodeGen SplitKit.cpp StackProtector.cpp StackSlotColoring.cpp + StackColoring.cpp StrongPHIElimination.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp TargetInstrInfoImpl.cpp TargetLoweringObjectFileImpl.cpp TargetOptionsImpl.cpp + TargetSchedule.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 939af3f..dee339a 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -9,7 +9,6 @@ #define DEBUG_TYPE "calcspillweights" -#include "llvm/Function.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -42,8 +41,7 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); LiveIntervals &LIS = getAnalysis(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -166,7 +164,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { continue; float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && LIS.isAllocatable(hint)) + if (hweight > bestPhys && mri.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; } else { if (hweight > bestVirt) diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 0b747fd..22b9140 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -18,7 +18,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; @@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, if (MinAlign > (int)Align) Align = MinAlign; MF.getFrameInfo()->ensureMaxAlignment(Align); - TM.getTargetLowering()->HandleByVal(this, Size); + TM.getTargetLowering()->HandleByVal(this, Size, Align); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index fb2c2e8..a53f6f8 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineCopyPropagationPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); + initializeMachinePostDominatorTreePass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); @@ -56,6 +57,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegisterCoalescerPass(Registry); initializeSlotIndexesPass(Registry); initializeStackProtectorPass(Registry); + initializeStackColoringPass(Registry); initializeStackSlotColoringPass(Registry); initializeStrongPHIEliminationPass(Registry); initializeTailDuplicatePassPass(Registry); diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 99233df..d8e06c3 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -373,7 +373,7 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { /// bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { const Function *F = MF.getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) + if (F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize)) return false; unsigned Align = TLI->getPrefLoopAlignment(); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index a9de1c749..377b471 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -527,7 +527,7 @@ BreakAntiDependencies(const std::vector& SUnits, if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!RegClassInfo.isAllocatable(AntiDepReg)) + if (!MRI.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.test(AntiDepReg)) diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index b4394e8..8964269 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -33,7 +33,6 @@ namespace { const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; BitVector LivePhysRegs; - BitVector ReservedRegs; public: static char ID; // Pass identification, replacement for typeid @@ -70,7 +69,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. - if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg)) + if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; } else { if (!MRI->use_nodbg_empty(Reg)) @@ -90,9 +89,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); - // Treat reserved registers as always live. - ReservedRegs = TRI->getReservedRegs(MF); - // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. @@ -101,7 +97,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = &*I; // Start out assuming that reserved registers are live out of this block. - LivePhysRegs = ReservedRegs; + LivePhysRegs = MRI->getReservedRegs(); // Also add any explicit live-out physregs for this block. if (!MBB->empty() && MBB->back().isReturn()) diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index f9347ef..d5d8404 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -18,7 +18,6 @@ #define DEBUG_TYPE "early-ifcvt" #include "MachineTraceMetrics.h" -#include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -32,9 +31,9 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -775,11 +774,11 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" - << "********** Function: " - << ((Value*)MF.getFunction())->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); - SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; + SchedModel = + MF.getTarget().getSubtarget().getSchedModel(); MRI = &MF.getRegInfo(); DomTree = &getAnalysis(); Loops = getAnalysisIfAvailable(); @@ -798,6 +797,5 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { if (tryConvertIf(I->getBlock())) Changed = true; - MF.verify(this, "After early if-conversion"); return Changed; } diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index fee8e47..ed78f19 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -626,9 +626,12 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } dv->Instrs.push_back(mi); - // Finally set all defs and non-collapsed uses to dv. - for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); + // Finally set all defs and non-collapsed uses to dv. We must iterate through + // all the operators, including imp-def ones. + for (MachineInstr::mop_iterator ii = mi->operands_begin(), + ee = mi->operands_end(); + ii != ee; ++ii) { + MachineOperand &mo = *ii; if (!mo.isReg()) continue; int rx = regIndex(mo.getReg()); if (rx < 0) continue; @@ -654,7 +657,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { bool anyregs = false; for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) { + if (MF->getRegInfo().isPhysRegUsed(*I)) { anyregs = true; break; } diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 7a17331..ffe4b63 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "postrapseudos" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -190,8 +189,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Machine Function\n" << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 506b5cf..f4755bb 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -20,6 +20,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/DominatorInternals.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -387,9 +388,16 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { const TargetFrameLowering *TFI = TM->getFrameLowering(); assert(TFI && "TargetRegisterInfo not available!"); - for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), - RE = FI->roots_end(); RI != RE; ++RI) - RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); + RI != FI->roots_end();) { + // If the root references a dead object, no need to keep it. + if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { + RI = FI->removeStackRoot(RI); + } else { + RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + ++RI; + } + } } bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 4214ba1..31e36f0 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "ifcvt" #include "BranchFolding.h" -#include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -282,7 +281,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" - << MF.getFunction()->getName() << "\'"); + << MF.getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { DEBUG(dbgs() << " skipped\n"); @@ -997,14 +996,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; - if (Redefs.count(Reg)) { + if (!Redefs.insert(Reg)) { if (AddImpUse) // Treat predicated update as read + write. MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, true/*IsImp*/,false/*IsKill*/, false/*IsDead*/,true/*IsUndef*/)); } else { - Redefs.insert(Reg); for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Redefs.insert(*SubRegs); } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 07e37af..37828a7 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -613,7 +613,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, propagateSiblingValue(SVI); } while (!WorkList.empty()); - // Look up the value we were looking for. We already did this lokup at the + // Look up the value we were looking for. We already did this lookup at the // top of the function, but SibValues may have been invalidated. SVI = SibValues.find(UseVNI); assert(SVI != SibValues.end() && "Didn't compute requested info"); @@ -863,7 +863,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. SmallVector, 8> Ops; - MIBundleOperands::RegInfo RI = + MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); @@ -1142,7 +1142,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Analyze instruction. SmallVector, 8> Ops; - MIBundleOperands::RegInfo RI = + MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 8d2282a..6120ae56 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; template @@ -457,7 +457,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -468,7 +468,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -479,7 +479,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index cac0c83..24daafa 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -172,7 +172,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, const MCSubtargetInfo &STI = getSubtarget(); MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Context); - MAB = getTarget().createMCAsmBackend(getTargetTriple()); + MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); } MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, @@ -191,7 +191,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Context); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) return true; @@ -266,7 +266,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, const MCSubtargetInfo &STI = getSubtarget(); MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Ctx); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) return true; diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index d631726..defc127 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -687,8 +687,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { clear(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " - << ((Value*)mf.getFunction())->getName() - << " **********\n"); + << mf.getName() << " **********\n"); bool Changed = collectDebugValues(mf); computeIntervals(); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 0a795e6..8585cbb 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "RegisterCoalescer.h" #include using namespace llvm; @@ -58,8 +59,16 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, return VNI; } if (SlotIndex::isSameInstr(Def, I->start)) { - assert(I->start == Def && "Cannot insert def, already live"); - assert(I->valno->def == Def && "Inconsistent existing value def"); + assert(I->valno->def == I->start && "Inconsistent existing value def"); + + // It is possible to have both normal and early-clobber defs of the same + // register on an instruction. It doesn't make a lot of sense, but it is + // possible to specify in inline assembly. + // + // Just convert everything to early-clobber. + Def = std::min(Def, I->start); + if (Def != I->start) + I->start = I->valno->def = Def; return I->valno; } assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); @@ -68,21 +77,6 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, return VNI; } -/// killedInRange - Return true if the interval has kills in [Start,End). -bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { - Ranges::const_iterator r = - std::lower_bound(ranges.begin(), ranges.end(), End); - - // Now r points to the first interval with start >= End, or ranges.end(). - if (r == ranges.begin()) - return false; - - --r; - // Now r points to the last interval with end <= End. - // r->end is the kill point. - return r->end >= Start && r->end < End; -} - // overlaps - Return true if the intersection of the two live intervals is // not empty. // @@ -142,6 +136,48 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, return false; } +bool LiveInterval::overlaps(const LiveInterval &Other, + const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty interval"); + if (Other.empty()) + return false; + + // Use binary searches to find initial positions. + const_iterator I = find(Other.beginIndex()); + const_iterator IE = end(); + if (I == IE) + return false; + const_iterator J = Other.find(I->start); + const_iterator JE = Other.end(); + if (J == JE) + return false; + + for (;;) { + // J has just been advanced to satisfy: + assert(J->end >= I->start); + // Check for an overlap. + if (J->start < I->end) { + // I and J are overlapping. Find the later start. + SlotIndex Def = std::max(I->start, J->start); + // Allow the overlap if Def is a coalescable copy. + if (Def.isBlock() || + !CP.isCoalescable(Indexes.getInstructionFromIndex(Def))) + return true; + } + // Advance the iterator that ends first to check for more overlaps. + if (J->end > I->end) { + std::swap(I, J); + std::swap(IE, JE); + } + // Advance J until J->end >= I->start. + do + if (++J == JE) + return false; + while (J->end < I->start); + } +} + /// overlaps - Return true if the live interval overlaps a range specified /// by [Start, End). bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { @@ -399,7 +435,7 @@ void LiveInterval::join(LiveInterval &Other, // If we have to apply a mapping to our base interval assignment, rewrite it // now. - if (MustMapCurValNos) { + if (MustMapCurValNos && !empty()) { // Map the first live range. iterator OutIt = begin(); @@ -673,27 +709,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { return V2; } -void LiveInterval::Copy(const LiveInterval &RHS, - MachineRegisterInfo *MRI, - VNInfo::Allocator &VNInfoAllocator) { - ranges.clear(); - valnos.clear(); - std::pair Hint = MRI->getRegAllocationHint(RHS.reg); - MRI->setRegAllocationHint(reg, Hint.first, Hint.second); - - weight = RHS.weight; - for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) { - const VNInfo *VNI = RHS.getValNumInfo(i); - createValueCopy(VNI, VNInfoAllocator); - } - for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) { - const LiveRange &LR = RHS.ranges[i]; - addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); - } - - verify(); -} - unsigned LiveInterval::getSize() const { unsigned Sum = 0; for (const_iterator I = begin(), E = end(); I != E; ++I) @@ -705,9 +720,11 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveRange::dump() const { dbgs() << *this << "\n"; } +#endif void LiveInterval::print(raw_ostream &OS) const { if (empty()) @@ -740,9 +757,11 @@ void LiveInterval::print(raw_ostream &OS) const { } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveInterval::dump() const { dbgs() << *this << "\n"; } +#endif #ifndef NDEBUG void LiveInterval::verify() const { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index d0f8ae1..4e75d89 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "LiveRangeCalc.h" +#include "VirtRegMap.h" #include #include #include @@ -109,8 +110,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { DomTree = &getAnalysis(); if (!LRCalc) LRCalc = new LiveRangeCalc(); - AllocatableRegs = TRI->getAllocatableSet(fn); - ReservedRegs = TRI->getReservedRegs(fn); // Allocate space for all virtual registers. VirtRegIntervals.resize(MRI->getNumVirtRegs()); @@ -147,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; } + OS << "RegMasks:"; + for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i) + OS << ' ' << RegMaskSlots[i]; + OS << '\n'; + printInstrs(OS); } @@ -155,9 +159,11 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { MF->print(OS, Indexes); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } +#endif static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { @@ -382,8 +388,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, /// which a variable is live void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " - << ((Value*)MF->getFunction())->getName() << '\n'); + << "********** Function: " << MF->getName() << '\n'); RegMaskBlocks.resize(MF->getNumBlockIDs()); @@ -440,7 +445,7 @@ void LiveIntervals::computeIntervals() { // Compute the number of register mask instructions in this block. std::pair &RMB = RegMaskBlocks[MBB->getNumber()]; - RMB.second = RegMaskSlots.size() - RMB.first;; + RMB.second = RegMaskSlots.size() - RMB.first; } // Create empty intervals for registers defined by implicit_def's (except @@ -497,7 +502,7 @@ void LiveIntervals::computeRegMasks() { RegMaskBits.push_back(MO->getRegMask()); } // Compute the number of register mask instructions in this block. - RMB.second = RegMaskSlots.size() - RMB.first;; + RMB.second = RegMaskSlots.size() - RMB.first; } } @@ -540,11 +545,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { // Ignore uses of reserved registers. We only track defs of those. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { unsigned Root = *Roots; - if (!isReserved(Root) && !MRI->reg_empty(Root)) + if (!MRI->isReserved(Root) && !MRI->reg_empty(Root)) LRCalc->extendToUses(LI, Root); for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { unsigned Reg = *Supers; - if (!isReserved(Reg) && !MRI->reg_empty(Reg)) + if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) LRCalc->extendToUses(LI, Reg); } } @@ -729,17 +734,100 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, return CanSeparate; } +void LiveIntervals::extendToIndices(LiveInterval *LI, + ArrayRef Indices) { + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) + LRCalc->extend(LI, Indices[i]); +} + +void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, + SmallVectorImpl *EndPoints) { + LiveRangeQuery LRQ(*LI, Kill); + VNInfo *VNI = LRQ.valueOut(); + if (!VNI) + return; + + MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); + SlotIndex MBBStart, MBBEnd; + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); + + // If VNI isn't live out from KillMBB, the value is trivially pruned. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(Kill, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + return; + } + + // VNI is live out of KillMBB. + LI->removeRange(Kill, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + + // Find all blocks that are reachable from KillMBB without leaving VNI's live + // range. It is possible that KillMBB itself is reachable, so start a DFS + // from each successor. + typedef SmallPtrSet VisitedTy; + VisitedTy Visited; + for (MachineBasicBlock::succ_iterator + SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end(); + SuccI != SuccE; ++SuccI) { + for (df_ext_iterator + I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited); + I != E;) { + MachineBasicBlock *MBB = *I; + + // Check if VNI is live in to MBB. + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); + LiveRangeQuery LRQ(*LI, MBBStart); + if (LRQ.valueIn() != VNI) { + // This block isn't part of the VNI live range. Prune the search. + I.skipChildren(); + continue; + } + + // Prune the search if VNI is killed in MBB. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(MBBStart, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + I.skipChildren(); + continue; + } + + // VNI is live through MBB. + LI->removeRange(MBBStart, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + ++I; + } + } +} //===----------------------------------------------------------------------===// // Register allocator hooks. // -void LiveIntervals::addKillFlags() { +void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { + // Keep track of regunit ranges. + SmallVector, 8> RU; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; LiveInterval *LI = &getInterval(Reg); + if (LI->empty()) + continue; + + // Find the regunit intervals for the assigned register. They may overlap + // the virtual register live range, cancelling any kills. + RU.clear(); + for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); + ++Units) { + LiveInterval *RUInt = &getRegUnit(*Units); + if (RUInt->empty()) + continue; + RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + } // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; @@ -750,7 +838,32 @@ void LiveIntervals::addKillFlags() { MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) continue; - MI->addRegisterKilled(Reg, NULL); + + // Check if any of the reguints are live beyond the end of RI. That could + // happen when a physreg is defined as a copy of a virtreg: + // + // %EAX = COPY %vreg5 + // FOO %vreg5 <--- MI, cancel kill because %EAX is live. + // BAR %EAX + // + // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. + bool CancelKill = false; + for (unsigned u = 0, e = RU.size(); u != e; ++u) { + LiveInterval *RInt = RU[u].first; + LiveInterval::iterator &I = RU[u].second; + if (I == RInt->end()) + continue; + I = RInt->advanceTo(I, RI->end); + if (I == RInt->end() || I->start >= RI->end) + continue; + // I is overlapping RI. + CancelKill = true; + break; + } + if (CancelKill) + MI->clearRegisterKills(Reg, NULL); + else + MI->addRegisterKilled(Reg, NULL); } } } @@ -900,497 +1013,321 @@ private: LiveIntervals& LIS; const MachineRegisterInfo& MRI; const TargetRegisterInfo& TRI; + SlotIndex OldIdx; SlotIndex NewIdx; - - typedef std::pair IntRangePair; - typedef DenseSet RangeSet; - - struct RegRanges { - LiveRange* Use; - LiveRange* EC; - LiveRange* Dead; - LiveRange* Def; - RegRanges() : Use(0), EC(0), Dead(0), Def(0) {} - }; - typedef DenseMap BundleRanges; + SmallPtrSet Updated; + bool UpdateFlags; public: HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI, - const TargetRegisterInfo& TRI, SlotIndex NewIdx) - : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {} - - // Update intervals for all operands of MI from OldIdx to NewIdx. - // This assumes that MI used to be at OldIdx, and now resides at - // NewIdx. - void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) { - assert(NewIdx != OldIdx && "No-op move? That's a bit strange."); - - // Collect the operands. - RangeSet Entering, Internal, Exiting; - bool hasRegMaskOp = false; - collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); - - // To keep the LiveRanges valid within an interval, move the ranges closest - // to the destination first. This prevents ranges from overlapping, to that - // APIs like removeRange still work. - if (NewIdx < OldIdx) { - moveAllEnteringFrom(OldIdx, Entering); - moveAllInternalFrom(OldIdx, Internal); - moveAllExitingFrom(OldIdx, Exiting); - } - else { - moveAllExitingFrom(OldIdx, Exiting); - moveAllInternalFrom(OldIdx, Internal); - moveAllEnteringFrom(OldIdx, Entering); - } - - if (hasRegMaskOp) - updateRegMaskSlots(OldIdx); - -#ifndef NDEBUG - LIValidator validator; - validator = std::for_each(Entering.begin(), Entering.end(), validator); - validator = std::for_each(Internal.begin(), Internal.end(), validator); - validator = std::for_each(Exiting.begin(), Exiting.end(), validator); - assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness."); -#endif - + const TargetRegisterInfo& TRI, + SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags) + : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx), + UpdateFlags(UpdateFlags) {} + + // FIXME: UpdateFlags is a workaround that creates live intervals for all + // physregs, even those that aren't needed for regalloc, in order to update + // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill + // flags, and postRA passes will use a live register utility instead. + LiveInterval *getRegUnitLI(unsigned Unit) { + if (UpdateFlags) + return &LIS.getRegUnit(Unit); + return LIS.getCachedRegUnit(Unit); } - // Update intervals for all operands of MI to refer to BundleStart's - // SlotIndex. - void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) { - if (MI == BundleStart) - return; // Bundling instr with itself - nothing to do. - - SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI); - assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI && - "SlotIndex <-> Instruction mapping broken for MI"); - - // Collect all ranges already in the bundle. - MachineBasicBlock::instr_iterator BII(BundleStart); - RangeSet Entering, Internal, Exiting; - bool hasRegMaskOp = false; - collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); - for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) { - if (&*BII == MI) + /// Update all live ranges touched by MI, assuming a move from OldIdx to + /// NewIdx. + void updateAllRanges(MachineInstr *MI) { + DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI); + bool hasRegMask = false; + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (MO->isRegMask()) + hasRegMask = true; + if (!MO->isReg()) continue; - collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); - } - - BundleRanges BR = createBundleRanges(Entering, Internal, Exiting); - - Entering.clear(); - Internal.clear(); - Exiting.clear(); - collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); + // Aggressively clear all kill flags. + // They are reinserted by VirtRegRewriter. + if (MO->isUse()) + MO->setIsKill(false); - DEBUG(dbgs() << "Entering: " << Entering.size() << "\n"); - DEBUG(dbgs() << "Internal: " << Internal.size() << "\n"); - DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n"); - - moveAllEnteringFromInto(OldIdx, Entering, BR); - moveAllInternalFromInto(OldIdx, Internal, BR); - moveAllExitingFromInto(OldIdx, Exiting, BR); - - -#ifndef NDEBUG - LIValidator validator; - validator = std::for_each(Entering.begin(), Entering.end(), validator); - validator = std::for_each(Internal.begin(), Internal.end(), validator); - validator = std::for_each(Exiting.begin(), Exiting.end(), validator); - assert(validator.rangesOk() && "moveAllOperandsInto broke liveness."); -#endif - } - -private: - -#ifndef NDEBUG - class LIValidator { - private: - DenseSet Checked, Bogus; - public: - void operator()(const IntRangePair& P) { - const LiveInterval* LI = P.first; - if (Checked.count(LI)) - return; - Checked.insert(LI); - if (LI->empty()) - return; - SlotIndex LastEnd = LI->begin()->start; - for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end(); - LRI != LRE; ++LRI) { - const LiveRange& LR = *LRI; - if (LastEnd > LR.start || LR.start >= LR.end) - Bogus.insert(LI); - LastEnd = LR.end; - } - } - - bool rangesOk() const { - return Bogus.empty(); - } - }; -#endif - - // Collect IntRangePairs for all operands of MI that may need fixing. - // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes' - // maps). - void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal, - RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) { - hasRegMaskOp = false; - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& MO = *MOI; - - if (MO.isRegMask()) { - hasRegMaskOp = true; + unsigned Reg = MO->getReg(); + if (!Reg) continue; - } - - if (!MO.isReg() || MO.getReg() == 0) - continue; - - unsigned Reg = MO.getReg(); - - // TODO: Currently we're skipping uses that are reserved or have no - // interval, but we're not updating their kills. This should be - // fixed. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + updateRange(LIS.getInterval(Reg)); continue; - - // Collect ranges for register units. These live ranges are computed on - // demand, so just skip any that haven't been computed yet. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) - collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx); - } else { - // Collect ranges for individual virtual registers. - collectRanges(MO, &LIS.getInterval(Reg), - Entering, Internal, Exiting, OldIdx); } + + // For physregs, only update the regunits that actually have a + // precomputed live range. + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = getRegUnitLI(*Units)) + updateRange(*LI); } + if (hasRegMask) + updateRegMaskSlots(); } - void collectRanges(const MachineOperand &MO, LiveInterval *LI, - RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting, - SlotIndex OldIdx) { - if (MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); - } - if (MO.isDef()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); - assert(LR != 0 && "No live range for def?"); - if (LR->end > OldIdx.getDeadSlot()) - Exiting.insert(std::make_pair(LI, LR)); +private: + /// Update a single live range, assuming an instruction has been moved from + /// OldIdx to NewIdx. + void updateRange(LiveInterval &LI) { + if (!Updated.insert(&LI)) + return; + DEBUG({ + dbgs() << " "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + dbgs() << PrintReg(LI.reg); else - Internal.insert(std::make_pair(LI, LR)); - } + dbgs() << PrintRegUnit(LI.reg, &TRI); + dbgs() << ":\t" << LI << '\n'; + }); + if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) + handleMoveDown(LI); + else + handleMoveUp(LI); + DEBUG(dbgs() << " -->\t" << LI << '\n'); + LI.verify(); } - BundleRanges createBundleRanges(RangeSet& Entering, - RangeSet& Internal, - RangeSet& Exiting) { - BundleRanges BR; - - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) { - LiveInterval* LI = EI->first; - LiveRange* LR = EI->second; - BR[LI->reg].Use = LR; - } + /// Update LI to reflect an instruction has been moved downwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Move def to NewIdx, assert endpoint after NewIdx. + /// + /// 2. Live def at OldIdx, killed at NewIdx: + /// Change to dead def at NewIdx. + /// (Happens when bundling def+kill together). + /// + /// 3. Dead def at OldIdx: + /// Move def to NewIdx, possibly across another live value. + /// + /// 4. Def at OldIdx AND at NewIdx: + /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value read at OldIdx, killed before NewIdx: + /// Extend kill to NewIdx. + /// + void handleMoveDown(LiveInterval &LI) { + // First look for a kill at OldIdx. + LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); + LiveInterval::iterator E = LI.end(); + // Is LI even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + return; - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) { - LiveInterval* LI = II->first; - LiveRange* LR = II->second; - if (LR->end.isDead()) { - BR[LI->reg].Dead = LR; - } else { - BR[LI->reg].EC = LR; - } + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + bool isKill = SlotIndex::isSameInstr(OldIdx, I->end); + // If the live-in value already extends to NewIdx, there is nothing to do. + if (!SlotIndex::isEarlierInstr(I->end, NewIdx)) + return; + // Aggressively remove all kill flags from the old kill point. + // Kill flags shouldn't be used while live intervals exist, they will be + // reinserted by VirtRegRewriter. + if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end)) + for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse()) + MO->setIsKill(false); + // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by + // overlapping ranges. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + // If this was a kill, there may also be a def. Otherwise we're done. + if (!isKill) + return; + ++I; } - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) { - LiveInterval* LI = EI->first; - LiveRange* LR = EI->second; - BR[LI->reg].Def = LR; + // Check for a def at OldIdx. + if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start)) + return; + // We have a def at OldIdx. + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + // If the defined value extends beyond NewIdx, just move the def down. + // This is case 1 above. + if (SlotIndex::isEarlierInstr(NewIdx, I->end)) { + I->start = DefVNI->def; + return; } - - return BR; - } - - void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) { - MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx); - if (!OldKillMI->killsRegister(reg)) - return; // Bail out if we don't have kill flags on the old register. - MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx); - assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill."); - assert(!NewKillMI->killsRegister(reg) && - "New kill instr is already a kill."); - OldKillMI->clearRegisterKills(reg, &TRI); - NewKillMI->addRegisterKilled(reg, &TRI); - } - - void updateRegMaskSlots(SlotIndex OldIdx) { - SmallVectorImpl::iterator RI = - std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), - OldIdx); - assert(*RI == OldIdx && "No RegMask at OldIdx."); - *RI = NewIdx; - assert(*prior(RI) < *RI && *RI < *next(RI) && - "RegSlots out of order. Did you move one call across another?"); - } - - // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) { - SlotIndex LastUse = NewIdx; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI.use_nodbg_begin(Reg), - UE = MRI.use_nodbg_end(); - UI != UE; UI.skipInstruction()) { - const MachineInstr* MI = &*UI; - SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); - if (InstSlot > LastUse && InstSlot < OldIdx) - LastUse = InstSlot; + // The remaining possibilities are now: + // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx). + // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot(). + // In either case, it is possible that there is an existing def at NewIdx. + assert((I->end == OldIdx.getDeadSlot() || + SlotIndex::isSameInstr(I->end, NewIdx)) && + "Cannot move def below kill"); + LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot()); + if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { + // There is an existing def at NewIdx, case 4 above. The def at OldIdx is + // coalesced into that value. + assert(NewI->valno != DefVNI && "Multiple defs of value?"); + LI.removeValNo(DefVNI); + return; } - return LastUse; + // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. + // If the def at OldIdx was dead, we allow it to be moved across other LI + // values. The new range should be placed immediately before NewI, move any + // intermediate ranges up. + assert(NewI != I && "Inconsistent iterators"); + std::copy(llvm::next(I), NewI, I); + *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - bool LiveThrough = LR->end > OldIdx.getRegSlot(); - if (LiveThrough) + /// Update LI to reflect an instruction has been moved upwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Hoist def to NewIdx. + /// + /// 2. Dead def at OldIdx: + /// Hoist def+end to NewIdx, possibly move across other values. + /// + /// 3. Dead def at OldIdx AND existing def at NewIdx: + /// Remove value defined at OldIdx, coalescing it with existing value. + /// + /// 4. Live def at OldIdx AND existing def at NewIdx: + /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value killed at OldIdx: + /// Hoist kill to NewIdx, then scan for last kill between NewIdx and + /// OldIdx. + /// + void handleMoveUp(LiveInterval &LI) { + // First look for a kill at OldIdx. + LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); + LiveInterval::iterator E = LI.end(); + // Is LI even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; - SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); - if (LastUse != NewIdx) - moveKillFlags(LI->reg, NewIdx, LastUse); - LR->end = LastUse.getRegSlot(); - } - void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - // Extend the LiveRange if NewIdx is past the end. - if (NewIdx > LR->end) { - // Move kill flags if OldIdx was not originally the end - // (otherwise LR->end points to an invalid slot). - if (LR->end.getRegSlot() != OldIdx.getRegSlot()) { - assert(LR->end > OldIdx && "LiveRange does not cover original slot"); - moveKillFlags(LI->reg, LR->end, NewIdx); + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + // If the live-in value isn't killed here, there is nothing to do. + if (!SlotIndex::isSameInstr(OldIdx, I->end)) + return; + // Adjust I->end to end at NewIdx. If we are hoisting a kill above + // another use, we need to search for that use. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + ++I; + // If OldIdx also defines a value, there couldn't have been another use. + if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { + // No def, search for the new kill. + // This can never be an early clobber kill since there is no def. + llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot(); + return; } - LR->end = NewIdx.getRegSlot(); - } - } - - void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) { - bool GoingUp = NewIdx < OldIdx; - - if (GoingUp) { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringUpFrom(OldIdx, *EI); - } else { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringDownFrom(OldIdx, *EI); } - } - - void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && - LR->end <= OldIdx.getDeadSlot() && - "Range should be internal to OldIdx."); - LiveRange Tmp(*LR); - Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber()); - Tmp.valno->def = Tmp.start; - Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot(); - LI->removeRange(*LR); - LI->addRange(Tmp); - } - - void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) { - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) - moveInternalFrom(OldIdx, *II); - } - - void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveRange* LR = P.second; - assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && - "Range should start in OldIdx."); - assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx."); - SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber()); - LR->start = NewStart; - LR->valno->def = NewStart; - } - - void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) { - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) - moveExitingFrom(OldIdx, *EI); - } - void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - bool LiveThrough = LR->end > OldIdx.getRegSlot(); - if (LiveThrough) { - assert((LR->start < NewIdx || BR[LI->reg].Def == LR) && - "Def in bundle should be def range."); - assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && - "If bundle has use for this reg it should be LR."); - BR[LI->reg].Use = LR; + // Now deal with the def at OldIdx. + assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?"); + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + + // Check for an existing def at NewIdx. + LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot()); + if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { + assert(NewI->valno != DefVNI && "Same value defined more than once?"); + // There is an existing def at NewIdx. + if (I->end.isDead()) { + // Case 3: Remove the dead def at OldIdx. + LI.removeValNo(DefVNI); + return; + } + // Case 4: Replace def at NewIdx with live def at OldIdx. + I->start = DefVNI->def; + LI.removeValNo(NewI->valno); return; } - SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); - moveKillFlags(LI->reg, OldIdx, LastUse); - - if (LR->start < NewIdx) { - // Becoming a new entering range. - assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 && - "Bundle shouldn't be re-defining reg mid-range."); - assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && - "Bundle shouldn't have different use range for same reg."); - LR->end = LastUse.getRegSlot(); - BR[LI->reg].Use = LR; - } else { - // Becoming a new Dead-def. - assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) && - "Live range starting at unexpected slot."); - assert(BR[LI->reg].Def == LR && "Reg should have def range."); - assert(BR[LI->reg].Dead == 0 && - "Can't have def and dead def of same reg in a bundle."); - LR->end = LastUse.getDeadSlot(); - BR[LI->reg].Dead = BR[LI->reg].Def; - BR[LI->reg].Def = 0; - } - } - - void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - if (NewIdx > LR->end) { - // Range extended to bundle. Add to bundle uses. - // Note: Currently adds kill flags to bundle start. - assert(BR[LI->reg].Use == 0 && - "Bundle already has use range for reg."); - moveKillFlags(LI->reg, LR->end, NewIdx); - LR->end = NewIdx.getRegSlot(); - BR[LI->reg].Use = LR; - } else { - assert(BR[LI->reg].Use != 0 && - "Bundle should already have a use range for reg."); - } - } - - void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering, - BundleRanges& BR) { - bool GoingUp = NewIdx < OldIdx; - - if (GoingUp) { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringUpFromInto(OldIdx, *EI, BR); - } else { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringDownFromInto(OldIdx, *EI, BR); + // There is no existing def at NewIdx. Hoist DefVNI. + if (!I->end.isDead()) { + // Leave the end point of a live def. + I->start = DefVNI->def; + return; } - } - void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - // TODO: Sane rules for moving ranges into bundles. + // DefVNI is a dead def. It may have been moved across other values in LI, + // so move I up to NewI. Slide [NewI;I) down one position. + std::copy_backward(NewI, I, llvm::next(I)); + *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal, - BundleRanges& BR) { - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) - moveInternalFromInto(OldIdx, *II, BR); + void updateRegMaskSlots() { + SmallVectorImpl::iterator RI = + std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), + OldIdx); + assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() && + "No RegMask at OldIdx."); + *RI = NewIdx.getRegSlot(); + assert((RI == LIS.RegMaskSlots.begin() || + SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((llvm::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) && + "Cannot move regmask instruction below another call"); } - void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - - assert(LR->start.isRegister() && - "Don't know how to merge exiting ECs into bundles yet."); + // Return the last use of reg between NewIdx and OldIdx. + SlotIndex findLastUseBefore(unsigned Reg) { + SlotIndex LastUse = NewIdx; - if (LR->end > NewIdx.getDeadSlot()) { - // This range is becoming an exiting range on the bundle. - // If there was an old dead-def of this reg, delete it. - if (BR[LI->reg].Dead != 0) { - LI->removeRange(*BR[LI->reg].Dead); - BR[LI->reg].Dead = 0; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(Reg), + UE = MRI.use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); + if (InstSlot > LastUse && InstSlot < OldIdx) + LastUse = InstSlot; } - assert(BR[LI->reg].Def == 0 && - "Can't have two defs for the same variable exiting a bundle."); - LR->start = NewIdx.getRegSlot(); - LR->valno->def = LR->start; - BR[LI->reg].Def = LR; } else { - // This range is becoming internal to the bundle. - assert(LR->end == NewIdx.getRegSlot() && - "Can't bundle def whose kill is before the bundle"); - if (BR[LI->reg].Dead || BR[LI->reg].Def) { - // Already have a def for this. Just delete range. - LI->removeRange(*LR); - } else { - // Make range dead, record. - LR->end = NewIdx.getDeadSlot(); - BR[LI->reg].Dead = LR; - assert(BR[LI->reg].Use == LR && - "Range becoming dead should currently be use."); + MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx); + MachineBasicBlock::iterator MII(MI); + ++MII; + MachineBasicBlock* MBB = MI->getParent(); + for (; MII != MBB->end() && LIS.getInstructionIndex(MII) < OldIdx; ++MII){ + for (MachineInstr::mop_iterator MOI = MII->operands_begin(), + MOE = MII->operands_end(); + MOI != MOE; ++MOI) { + const MachineOperand& mop = *MOI; + if (!mop.isReg() || mop.getReg() == 0 || + TargetRegisterInfo::isVirtualRegister(mop.getReg())) + continue; + + if (TRI.hasRegUnit(mop.getReg(), Reg)) + LastUse = LIS.getInstructionIndex(MII); + } } - // In both cases the range is no longer a use on the bundle. - BR[LI->reg].Use = 0; } + return LastUse; } - - void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting, - BundleRanges& BR) { - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) - moveExitingFromInto(OldIdx, *EI, BR); - } - }; -void LiveIntervals::handleMove(MachineInstr* MI) { +void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) { + assert(!MI->isBundled() && "Can't handle bundled instructions yet."); SlotIndex OldIndex = Indexes->getInstructionIndex(MI); Indexes->removeMachineInstrFromMaps(MI); - SlotIndex NewIndex = MI->isInsideBundle() ? - Indexes->getInstructionIndex(MI) : - Indexes->insertMachineInstrInMaps(MI); + SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); assert(getMBBStartIdx(MI->getParent()) <= OldIndex && OldIndex < getMBBEndIdx(MI->getParent()) && "Cannot handle moves across basic block boundaries."); - assert(!MI->isBundled() && "Can't handle bundled instructions yet."); - HMEditor HME(*this, *MRI, *TRI, NewIndex); - HME.moveAllRangesFrom(MI, OldIndex); + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); } void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, - MachineInstr* BundleStart) { + MachineInstr* BundleStart, + bool UpdateFlags) { + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); - HMEditor HME(*this, *MRI, *TRI, NewIndex); - HME.moveAllRangesInto(MI, BundleStart); + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); } diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index cd4e690..4d41fca 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -178,8 +178,8 @@ public: bool checkLoopInterference(MachineLoopRange*); private: - Query(const Query&); // DO NOT IMPLEMENT - void operator=(const Query&); // DO NOT IMPLEMENT + Query(const Query&) LLVM_DELETED_FUNCTION; + void operator=(const Query&) LLVM_DELETED_FUNCTION; }; // Array of LiveIntervalUnions. diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d828f25..c3ff4f1 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -65,7 +65,11 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { // Visit all operands that read Reg. This may include partial defs. for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ++I) { - const MachineOperand &MO = I.getOperand(); + MachineOperand &MO = I.getOperand(); + // Clear all kill flags. They will be reinserted after register allocation + // by LiveIntervalAnalysis::addKillFlags(). + if (MO.isUse()) + MO.setIsKill(false); if (!MO.readsReg()) continue; // MI is reading Reg. We may have visited MI before if it happens to be diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index b4ce9aa..f8fbc7d 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -87,7 +87,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, // We can't remat physreg uses, unless it is a constant. if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction())) + if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent())) continue; return false; } @@ -96,6 +96,13 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) continue; + + // Don't allow rematerialization immediately after the original def. + // It would be incorrect if OrigMI redefines the register. + // See PR14098. + if (SlotIndex::isSameInstr(OrigIdx, UseIdx)) + return false; + if (OVNI != li.getVNInfoAt(UseIdx)) return false; } @@ -249,7 +256,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, unsigned Reg = MOI->getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) { // Check if MI reads any unreserved physregs. - if (Reg && MOI->readsReg() && !LIS.isReserved(Reg)) + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; continue; } diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index cdb1776..7f22478 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "regalloc" #include "LiveRegMatrix.h" +#include "RegisterCoalescer.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -117,8 +118,9 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg) { if (VirtReg.empty()) return false; + CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) - if (VirtReg.overlaps(LIS->getRegUnit(*Units))) + if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) return true; return false; } diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h index b3e2d7f..8f22c24 100644 --- a/lib/CodeGen/LiveRegMatrix.h +++ b/lib/CodeGen/LiveRegMatrix.h @@ -15,7 +15,7 @@ // Register units are defined in MCRegisterInfo.h, they represent the smallest // unit of interference when dealing with overlapping physical registers. The // LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When -// a virtual register is assigned to a physicval register, the live range for +// a virtual register is assigned to a physical register, the live range for // the virtual register is inserted into the LiveIntervalUnion for each regunit // in the physreg. // diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 939e795..f0b522b 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -25,7 +25,10 @@ using namespace llvm; char LiveStacks::ID = 0; -INITIALIZE_PASS(LiveStacks, "livestacks", +INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(LiveStacks, "livestacks", "Live Stack Slot Analysis", false, false) char &llvm::LiveStacksID = LiveStacks::ID; diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 348ed3a..6ea933d 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -65,6 +65,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { } void LiveVariables::VarInfo::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) @@ -77,6 +78,7 @@ void LiveVariables::VarInfo::dump() const { dbgs() << "\n #" << i << ": " << *Kills[i]; dbgs() << "\n"; } +#endif } /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. @@ -501,8 +503,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MRI = &mf.getRegInfo(); TRI = MF->getTarget().getRegisterInfo(); - ReservedRegisters = TRI->getReservedRegs(mf); - unsigned NumRegs = TRI->getNumRegs(); PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; @@ -586,7 +586,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = UseRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); - else if (!ReservedRegisters[MOReg]) + else if (!MRI->isReserved(MOReg)) HandlePhysRegUse(MOReg, MI); } @@ -599,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = DefRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); - else if (!ReservedRegisters[MOReg]) + else if (!MRI->isReserved(MOReg)) HandlePhysRegDef(MOReg, MI, Defs); } UpdatePhysRegDefs(MI, Defs); @@ -806,18 +806,44 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *SuccBB) { const unsigned NumNew = BB->getNumber(); - // All registers used by PHI nodes in SuccBB must be live through BB. - for (MachineBasicBlock::iterator BBI = SuccBB->begin(), - BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) + SmallSet Defs, Kills; + + MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); + for (; BBI != BBE && BBI->isPHI(); ++BBI) { + // Record the def of the PHI node. + Defs.insert(BBI->getOperand(0).getReg()); + + // All registers used by PHI nodes in SuccBB must be live through BB. for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) if (BBI->getOperand(i+1).getMBB() == BB) getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); + } + + // Record all vreg defs and kills of all instructions in SuccBB. + for (; BBI != BBE; ++BBI) { + for (MachineInstr::mop_iterator I = BBI->operands_begin(), + E = BBI->operands_end(); I != E; ++I) { + if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) { + if (I->isDef()) + Defs.insert(I->getReg()); + else if (I->isKill()) + Kills.insert(I->getReg()); + } + } + } // Update info for all live variables for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // If the Defs is defined in the successor it can't be live in BB. + if (Defs.count(Reg)) + continue; + + // If the register is either killed in or live through SuccBB it's also live + // through BB. VarInfo &VI = getVarInfo(Reg); - if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI)) + if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber())) VI.AliveBlocks.set(NumNew); } } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index fa6b450..18d021d 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -21,7 +21,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Assembly/Writer.h" @@ -145,7 +145,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { instr_iterator I = instr_begin(), E = instr_end(); while (I != E && I->isPHI()) ++I; - assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!"); + assert((I == E || !I->isInsideBundle()) && + "First non-phi MI cannot be inside a bundle!"); return I; } @@ -156,7 +157,7 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. - assert(!I->isInsideBundle() && + assert((I == E || !I->isInsideBundle()) && "First non-phi / non-label instruction is inside a bundle!"); return I; } @@ -228,9 +229,11 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { return 0; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineBasicBlock::dump() const { print(dbgs()); } +#endif StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) @@ -243,7 +246,7 @@ StringRef MachineBasicBlock::getName() const { std::string MachineBasicBlock::getFullName() const { std::string Name; if (getParent()) - Name = (getParent()->getFunction()->getName() + ":").str(); + Name = (getParent()->getName() + ":").str(); if (getBasicBlock()) Name += getBasicBlock()->getName(); else @@ -942,12 +945,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { /// getSuccWeight - Return weight of the edge from this block to MBB. /// -uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const { +uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { if (Weights.empty()) return 0; - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); - return *getWeightIterator(I); + return *getWeightIterator(Succ); } /// getWeightIterator - Return wight iterator corresonding to the I successor @@ -970,6 +972,80 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { return Weights.begin() + index; } +/// Return whether (physical) register "Reg" has been ined and not ed +/// as of just before "MI". +/// +/// Search is localised to a neighborhood of +/// Neighborhood instructions before (searching for defs or kills) and N +/// instructions after (searching just for defs) MI. +MachineBasicBlock::LivenessQueryResult +MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, + unsigned Reg, MachineInstr *MI, + unsigned Neighborhood) { + + unsigned N = Neighborhood; + MachineBasicBlock *MBB = MI->getParent(); + + // Start by searching backwards from MI, looking for kills, reads or defs. + + MachineBasicBlock::iterator I(MI); + // If this is the first insn in the block, don't search backwards. + if (I != MBB->begin()) { + do { + --I; + + MachineOperandIteratorBase::PhysRegInfo Analysis = + MIOperands(I).analyzePhysReg(Reg, TRI); + + if (Analysis.Kills) + // Register killed, so isn't live. + return LQR_Dead; + + else if (Analysis.DefinesOverlap || Analysis.ReadsOverlap) + // Defined or read without a previous kill - live. + return (Analysis.Defines || Analysis.Reads) ? + LQR_Live : LQR_OverlappingLive; + + } while (I != MBB->begin() && --N > 0); + } + + // Did we get to the start of the block? + if (I == MBB->begin()) { + // If so, the register's state is definitely defined by the live-in state. + for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); + RAI.isValid(); ++RAI) { + if (MBB->isLiveIn(*RAI)) + return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive; + } + + return LQR_Dead; + } + + N = Neighborhood; + + // Try searching forwards from MI, looking for reads or defs. + I = MachineBasicBlock::iterator(MI); + // If this is the last insn in the block, don't search forwards. + if (I != MBB->end()) { + for (++I; I != MBB->end() && N > 0; ++I, --N) { + MachineOperandIteratorBase::PhysRegInfo Analysis = + MIOperands(I).analyzePhysReg(Reg, TRI); + + if (Analysis.ReadsOverlap) + // Used, therefore must have been live. + return (Analysis.Reads) ? + LQR_Live : LQR_OverlappingLive; + + else if (Analysis.DefinesOverlap) + // Defined (but not read) therefore cannot have been live. + return LQR_Dead; + } + } + + // At this point we have no idea of the liveness of the register. + return LQR_Unknown; +} + void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, bool t) { OS << "BB#" << MBB->getNumber(); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index c4dca2c..cd3f199 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -500,11 +500,10 @@ void MachineBlockPlacement::buildChain( assert(BB); assert(BlockToChain[BB] == &Chain); assert(*llvm::prior(Chain.end()) == BB); - MachineBasicBlock *BestSucc = 0; // Look for the best viable successor if there is one to place immediately // after this block. - BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at @@ -1014,7 +1013,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (F.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 0cc1af0..4479211 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight; } @@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Sum = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight / Scale; } assert(Sum <= UINT32_MAX); return Sum; } -uint32_t -MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { uint32_t Weight = Src->getSuccWeight(Dst); if (!Weight) return DEFAULT_WEIGHT; return Weight; } +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + // This is a linear search. Try to use the const_succ_iterator version when + // possible. + return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); +} + bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% @@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { MachineBasicBlock *MaxSucc = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); if (Weight > MaxWeight) { MaxWeight = Weight; MaxSucc = *I; diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 896461f..dbc41de 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -63,8 +63,6 @@ namespace { virtual void releaseMemory() { ScopeMap.clear(); Exps.clear(); - AllocatableRegs.clear(); - ReservedRegs.clear(); } private: @@ -78,8 +76,6 @@ namespace { ScopedHTType VNT; SmallVector Exps; unsigned CurrVN; - BitVector AllocatableRegs; - BitVector ReservedRegs; bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, @@ -88,7 +84,8 @@ namespace { bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, - SmallVector &PhysDefs) const; + SmallVector &PhysDefs, + bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet &PhysRefs, SmallVector &PhysDefs, @@ -198,31 +195,52 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, - SmallVector &PhysDefs) const{ - MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + SmallVector &PhysDefs, + bool &PhysUseDef) const{ + // First, add all uses to PhysRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) + if (!MO.isReg() || MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - // If the def is dead, it's ok. But the def may not marked "dead". That's - // common since this pass is run before livevariables. We can scan - // forward a few instructions and check if it is obviously dead. - if (MO.isDef() && - (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) - continue; // Reading constant physregs is ok. if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); - if (MO.isDef()) + } + + // Next, collect all defs into PhysDefs. If any is already in PhysRefs + // (which currently contains only uses), set the PhysUseDef flag. + PhysUseDef = false; + MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Check against PhysRefs even if the def is "dead". + if (PhysRefs.count(Reg)) + PhysUseDef = true; + // If the def is dead, it's ok. But the def may not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) PhysDefs.push_back(Reg); } + // Finally, add all defs to PhysRefs as well. + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) + for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + return !PhysRefs.empty(); } @@ -242,7 +260,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { - if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i])) + if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) // Avoid extending live range of physical registers if they are //allocatable or reserved. return false; @@ -411,8 +429,8 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); DenseMap::iterator SI = ScopeMap.find(MBB); assert(SI != ScopeMap.end()); - ScopeMap.erase(SI); delete SI->second; + ScopeMap.erase(SI); } bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { @@ -463,16 +481,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool CrossMBBPhysDef = false; SmallSet PhysRefs; SmallVector PhysDefs; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { + bool PhysUseDef = false; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, + PhysDefs, PhysUseDef)) { FoundCSE = false; // ... Unless the CS is local or is in the sole predecessor block // and it also defines the physical register which is not clobbered // in between and the physical register uses were not clobbered. - unsigned CSVN = VNT.lookup(MI); - MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) - FoundCSE = true; + // This can never be the case if the instruction both uses and + // defines the same physical register, which was detected above. + if (!PhysUseDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) + FoundCSE = true; + } } if (!FoundCSE) { @@ -635,7 +659,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis(); DT = &getAnalysis(); - AllocatableRegs = TRI->getAllocatableSet(MF); - ReservedRegs = TRI->getReservedRegs(MF); return PerformCSE(DT->getRootNode()); } diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index bac3aa2..4a79328 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -16,6 +16,7 @@ #include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -32,7 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted"); namespace { class MachineCopyPropagation : public MachineFunctionPass { const TargetRegisterInfo *TRI; - BitVector ReservedRegs; + MachineRegisterInfo *MRI; public: static char ID; // Pass identification, replacement for typeid @@ -146,8 +147,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DenseMap::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; - if (!ReservedRegs.test(Def) && - (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && + if (!MRI->isReserved(Def) && + (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && isNopCopy(CopyMI, Def, Src, TRI)) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. @@ -259,7 +260,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { unsigned Reg = (*DI)->getOperand(0).getReg(); - if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg)) + if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) continue; (*DI)->eraseFromParent(); Changed = true; @@ -296,7 +297,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (SmallSetVector::iterator DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { - if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { + if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; @@ -311,7 +312,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; TRI = MF.getTarget().getRegisterInfo(); - ReservedRegs = TRI->getReservedRegs(MF); + MRI = &MF.getRegInfo(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) Changed |= CopyPropagateBlock(*I); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d4aede8a..91d5211 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -28,7 +28,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetFrameLowering.h" @@ -59,13 +59,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, RegInfo = 0; MFInfo = 0; FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); - if (Fn->hasFnAttr(Attribute::StackAlignment)) - FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs( - Fn->getAttributes().getFnAttributes())); - ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); + if (Fn->getFnAttributes().hasAttribute(Attributes::StackAlignment)) + FrameInfo->ensureMaxAlignment(Fn->getAttributes(). + getFnAttributes().getStackAlignment()); + ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. - if (!Fn->hasFnAttr(Attribute::OptimizeForSize)) + if (!Fn->getFnAttributes().hasAttribute(Attributes::OptimizeForSize)) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; @@ -284,12 +284,19 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, return std::make_pair(Result, Result + Num); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineFunction::dump() const { print(dbgs()); } +#endif + +StringRef MachineFunction::getName() const { + assert(getFunction() && "No function!"); + return getFunction()->getName(); +} void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { - OS << "# Machine code for function " << Fn->getName() << ": "; + OS << "# Machine code for function " << getName() << ": "; if (RegInfo) { OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); if (!RegInfo->tracksLiveness()) @@ -334,7 +341,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { BB->print(OS, Indexes); } - OS << "\n# End machine code for function " << Fn->getName() << ".\n\n"; + OS << "\n# End machine code for function " << getName() << ".\n\n"; } namespace llvm { @@ -344,7 +351,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getName().str() + "' function"; + return "CFG for '" + F->getName().str() + "' function"; } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -377,7 +384,7 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName()); + ViewGraph(this, "mf" + getName()); #else errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -387,7 +394,7 @@ void MachineFunction::viewCFG() const void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName(), true); + ViewGraph(this, "mf" + getName(), true); #else errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -453,7 +460,9 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/false, false)); + /*isSS*/ false, + /*NeedSP*/ false, + /*Alloca*/ 0)); return -++NumFixedObjects; } @@ -525,16 +534,18 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineFrameInfo::dump(const MachineFunction &MF) const { print(MF, dbgs()); } +#endif //===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// /// getEntrySize - Return the size of each entry in the jump table. -unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { +unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { // The size of a jump table entry is 4 bytes unless the entry is just the // address of a block, in which case it is the pointer size. switch (getEntryKind()) { @@ -553,7 +564,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { } /// getEntryAlignment - Return the alignment of each entry in the jump table. -unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { +unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { // The alignment of a jump table entry is the alignment of int32 unless the // entry is just the address of a block, in which case it is the pointer // alignment. @@ -622,7 +633,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << '\n'; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineJumpTableInfo::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// @@ -657,7 +670,7 @@ MachineConstantPool::~MachineConstantPool() { /// CanShareConstantPoolEntry - Test whether the given two constants /// can be allocated the same constant pool entry. static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, - const TargetData *TD) { + const DataLayout *TD) { // Handle the trivial case quickly. if (A == B) return true; @@ -681,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // Try constant folding a bitcast of both instructions to an integer. If we // get two identical ConstantInt's, then we are good to share them. We use // the constant folding APIs to do this so that we get the benefit of - // TargetData. + // DataLayout. if (isa(A->getType())) A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, const_cast(A), TD); @@ -749,10 +762,12 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - OS << *(Value*)Constants[i].Val.ConstVal; + OS << *(const Value*)Constants[i].Val.ConstVal; OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineConstantPool::dump() const { print(dbgs()); } +#endif diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0102ac7..ed94efb 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -51,7 +51,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { char MachineFunctionPrinterPass::ID = 0; } -char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; +char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs", "Machine Function Printer", false, false) diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index b166849..ce8d520 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -111,6 +111,7 @@ void MachineOperand::setIsDef(bool Val) { /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. if (isReg() && isOnRegUseList()) @@ -136,7 +137,8 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, RegInfo = &MF->getRegInfo(); // If this operand is already a register operand, remove it from the // register's use/def lists. - if (RegInfo && isReg()) + bool WasReg = isReg(); + if (RegInfo && WasReg) RegInfo->removeRegOperandFromUseList(this); // Change this to a register and set the reg#. @@ -153,6 +155,9 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsDebug = isDebug; // Ensure isOnRegUseList() returns false. Contents.Reg.Prev = 0; + // Preserve the tie when the operand was already a register. + if (!WasReg) + TiedTo = 0; // If this operand is embedded in a function, add the operand to the // register's use/def list. @@ -193,7 +198,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return !strcmp(getSymbolName(), Other.getSymbolName()) && getOffset() == Other.getOffset(); case MachineOperand::MO_BlockAddress: - return getBlockAddress() == Other.getBlockAddress(); + return getBlockAddress() == Other.getBlockAddress() && + getOffset() == Other.getOffset(); case MO_RegisterMask: return getRegMask() == Other.getRegMask(); case MachineOperand::MO_MCSymbol: @@ -208,8 +214,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { hash_code llvm::hash_value(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Register: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(), - MO.getSubReg(), MO.isDef()); + // Register operands don't have target flags. + return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); case MachineOperand::MO_Immediate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); case MachineOperand::MO_CImmediate: @@ -234,7 +240,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) { MO.getOffset()); case MachineOperand::MO_BlockAddress: return hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getBlockAddress()); + MO.getBlockAddress(), MO.getOffset()); case MachineOperand::MO_RegisterMask: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); case MachineOperand::MO_Metadata: @@ -262,7 +268,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << PrintReg(getReg(), TRI, getSubReg()); if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isInternalRead() || isEarlyClobber()) { + isInternalRead() || isEarlyClobber() || isTied()) { OS << '<'; bool NeedComma = false; if (isDef()) { @@ -282,27 +288,32 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { NeedComma = true; } - if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) { + if (isKill()) { if (NeedComma) OS << ','; - NeedComma = false; - if (isKill()) { - OS << "kill"; - NeedComma = true; - } - if (isDead()) { - OS << "dead"; - NeedComma = true; - } - if (isUndef() && isUse()) { - if (NeedComma) OS << ','; - OS << "undef"; - NeedComma = true; - } - if (isInternalRead()) { - if (NeedComma) OS << ','; - OS << "internal"; - NeedComma = true; - } + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + if (NeedComma) OS << ','; + OS << "dead"; + NeedComma = true; + } + if (isUndef() && isUse()) { + if (NeedComma) OS << ','; + OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; + } + if (isTied()) { + if (NeedComma) OS << ','; + OS << "tied"; + if (TiedTo != 15) + OS << unsigned(TiedTo - 1); + NeedComma = true; } OS << '>'; } @@ -352,6 +363,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_BlockAddress: OS << '<'; WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); + if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; case MachineOperand::MO_RegisterMask: @@ -528,20 +540,6 @@ void MachineInstr::addImplicitDefUseOperands() { /// MachineInstr ctor - This constructor creates a MachineInstr and adds the /// implicit operands. It reserves space for the number of operands specified by /// the MCInstrDesc. -MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) - : MCID(&tid), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), Parent(0) { - unsigned NumImplicitOps = 0; - if (!NoImp) - NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + MCID->getNumOperands()); - if (!NoImp) - addImplicitDefUseOperands(); - // Make sure that we get added to a machine basicblock - LeakDetector::addGarbageObject(this); -} - -/// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), @@ -559,21 +557,6 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, /// MachineInstr ctor - Work exactly the same as the ctor two above, except /// that the MachineInstr is created and added to the end of the specified /// basic block. -MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) - : MCID(&tid), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), Parent(0) { - assert(MBB && "Cannot use inserting ctor with null basic block!"); - unsigned NumImplicitOps = - MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + MCID->getNumOperands()); - addImplicitDefUseOperands(); - // Make sure that we get added to a machine basicblock - LeakDetector::addGarbageObject(this); - MBB->push_back(this); // Add instruction to end of basic block! -} - -/// MachineInstr ctor - As above, but with a DebugLoc. -/// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), @@ -673,6 +656,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (!isImpReg && !isInlineAsm()) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; + assert(!Operands[OpNo].isTied() && "Cannot move tied operands"); if (RegInfo) RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } @@ -708,12 +692,25 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { // Ensure isOnRegUseList() returns false, regardless of Op's status. Operands[OpNo].Contents.Reg.Prev = 0; + // Ignore existing ties. This is not a property that can be copied. + Operands[OpNo].TiedTo = 0; // Add the new operand to RegInfo. if (RegInfo) RegInfo->addRegOperandToUseList(&Operands[OpNo]); - // If the register operand is flagged as early, mark the operand as such. - if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); + // The MCID operand information isn't accurate until we start adding + // explicit operands. The implicit operands are added first, then the + // explicits are inserted before them. + if (!isImpReg) { + // Tie uses to defs as indicated in MCInstrDesc. + if (Operands[OpNo].isUse()) { + int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO); + if (DefIdx != -1) + tieOperands(DefIdx, OpNo); + } + // If the register operand is flagged as early, mark the operand as such. + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } } // Re-add all the implicit ops. @@ -730,6 +727,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + untieRegOperand(OpNo); MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. @@ -752,6 +750,13 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { } } +#ifndef NDEBUG + // Moving tied operands would break the ties. + for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) + if (Operands[i].isReg()) + assert(!Operands[i].isTied() && "Cannot move tied operands"); +#endif + Operands.erase(Operands.begin()+OpNo); if (RegInfo) { @@ -935,6 +940,12 @@ bool MachineInstr::isStackAligningInlineAsm() const { return false; } +InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const { + assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!"); + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0); +} + int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, unsigned *GroupNo) const { assert(isInlineAsm() && "Expected an inline asm instruction"); @@ -1004,9 +1015,10 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, unsigned MachineInstr::getBundleSize() const { assert(isBundle() && "Expecting a bundle"); - MachineBasicBlock::const_instr_iterator I = *this; + const MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end(); unsigned Size = 0; - while ((++I)->isInsideBundle()) { + while ((++I != E) && I->isInsideBundle()) { ++Size; } assert(Size > 1 && "Malformed bundle"); @@ -1114,107 +1126,99 @@ int MachineInstr::findFirstPredOperandIdx() const { return -1; } -/// isRegTiedToUseOperand - Given the index of a register def operand, -/// check if the register def is tied to a source operand, due to either -/// two-address elimination or inline assembly constraints. Returns the -/// first tied use operand index by reference is UseOpIdx is not null. -bool MachineInstr:: -isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { - if (isInlineAsm()) { - assert(DefOpIdx > InlineAsm::MIOp_FirstOperand); - const MachineOperand &MO = getOperand(DefOpIdx); - if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) - return false; - // Determine the actual operand index that corresponds to this index. - unsigned DefNo = 0; - int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo); - if (FlagIdx < 0) - return false; +// MachineOperand::TiedTo is 4 bits wide. +const unsigned TiedMax = 15; - // Which part of the group is DefOpIdx? - unsigned DefPart = DefOpIdx - (FlagIdx + 1); - - for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); - i != e; ++i) { - const MachineOperand &FMO = getOperand(i); - if (!FMO.isImm()) - continue; - if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse()) - continue; - unsigned Idx; - if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && - Idx == DefNo) { - if (UseOpIdx) - *UseOpIdx = (unsigned)i + 1 + DefPart; - return true; - } - } - return false; +/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other. +/// +/// Use and def operands can be tied together, indicated by a non-zero TiedTo +/// field. TiedTo can have these values: +/// +/// 0: Operand is not tied to anything. +/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1). +/// TiedMax: Tied to an operand >= TiedMax-1. +/// +/// The tied def must be one of the first TiedMax operands on a normal +/// instruction. INLINEASM instructions allow more tied defs. +/// +void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) { + MachineOperand &DefMO = getOperand(DefIdx); + MachineOperand &UseMO = getOperand(UseIdx); + assert(DefMO.isDef() && "DefIdx must be a def operand"); + assert(UseMO.isUse() && "UseIdx must be a use operand"); + assert(!DefMO.isTied() && "Def is already tied to another use"); + assert(!UseMO.isTied() && "Use is already tied to another def"); + + if (DefIdx < TiedMax) + UseMO.TiedTo = DefIdx + 1; + else { + // Inline asm can use the group descriptors to find tied operands, but on + // normal instruction, the tied def must be within the first TiedMax + // operands. + assert(isInlineAsm() && "DefIdx out of range"); + UseMO.TiedTo = TiedMax; } - assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); - const MCInstrDesc &MCID = getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.isUse() && - MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) { - if (UseOpIdx) - *UseOpIdx = (unsigned)i; - return true; - } - } - return false; + // UseIdx can be out of range, we'll search for it in findTiedOperandIdx(). + DefMO.TiedTo = std::min(UseIdx + 1, TiedMax); } -/// isRegTiedToDefOperand - Return true if the operand of the specified index -/// is a register use and it is tied to an def operand. It also returns the def -/// operand index by reference. -bool MachineInstr:: -isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { - if (isInlineAsm()) { - const MachineOperand &MO = getOperand(UseOpIdx); - if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) - return false; +/// Given the index of a tied register operand, find the operand it is tied to. +/// Defs are tied to uses and vice versa. Returns the index of the tied operand +/// which must exist. +unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isTied() && "Operand isn't tied"); - // Find the flag operand corresponding to UseOpIdx - int FlagIdx = findInlineAsmFlagIdx(UseOpIdx); - if (FlagIdx < 0) - return false; + // Normally TiedTo is in range. + if (MO.TiedTo < TiedMax) + return MO.TiedTo - 1; - const MachineOperand &UFMO = getOperand(FlagIdx); - unsigned DefNo; - if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { - if (!DefOpIdx) - return true; - - unsigned DefIdx = InlineAsm::MIOp_FirstOperand; - // Remember to adjust the index. First operand is asm string, second is - // the HasSideEffects and AlignStack bits, then there is a flag for each. - while (DefNo) { - const MachineOperand &FMO = getOperand(DefIdx); - assert(FMO.isImm()); - // Skip over this def. - DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; - --DefNo; - } - *DefOpIdx = DefIdx + UseOpIdx - FlagIdx; - return true; + // Uses on normal instructions can be out of range. + if (!isInlineAsm()) { + // Normal tied defs must be in the 0..TiedMax-1 range. + if (MO.isUse()) + return TiedMax - 1; + // MO is a def. Search for the tied use. + for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) { + const MachineOperand &UseMO = getOperand(i); + if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1) + return i; } - return false; + llvm_unreachable("Can't find tied use"); } - const MCInstrDesc &MCID = getDesc(); - if (UseOpIdx >= MCID.getNumOperands()) - return false; - const MachineOperand &MO = getOperand(UseOpIdx); - if (!MO.isReg() || !MO.isUse()) - return false; - int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO); - if (DefIdx == -1) - return false; - if (DefOpIdx) - *DefOpIdx = (unsigned)DefIdx; - return true; + // Now deal with inline asm by parsing the operand group descriptor flags. + // Find the beginning of each operand group. + SmallVector GroupIdx; + unsigned OpIdxGroup = ~0u; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + assert(FlagMO.isImm() && "Invalid tied operand on inline asm"); + unsigned CurGroup = GroupIdx.size(); + GroupIdx.push_back(i); + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + // OpIdx belongs to this operand group. + if (OpIdx > i && OpIdx < i + NumOps) + OpIdxGroup = CurGroup; + unsigned TiedGroup; + if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup)) + continue; + // Operands in this group are tied to operands in TiedGroup which must be + // earlier. Find the number of operands between the two groups. + unsigned Delta = i - GroupIdx[TiedGroup]; + + // OpIdx is a use tied to TiedGroup. + if (OpIdxGroup == CurGroup) + return OpIdx - Delta; + + // OpIdx is a def tied to this use group. + if (OpIdxGroup == TiedGroup) + return OpIdx + Delta; + } + llvm_unreachable("Invalid tied operand on inline asm"); } /// clearKillInfo - Clears kill flags on all operands. @@ -1292,7 +1296,12 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (mayStore() || isCall()) { + // + // Treat volatile loads as stores. This is not strictly necessary for + // volatiles, but it is required for atomic loads. It is not allowed to move + // a load across an atomic load with Ordering > Monotonic. + if (mayStore() || isCall() || + (mayLoad() && hasOrderedMemoryRef())) { SawStore = true; return false; } @@ -1308,8 +1317,8 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // load. if (mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and - // end of block, or if the load is volatile, we can't move it. - return !SawStore && !hasVolatileMemoryRef(); + // end of block, we can't move it. + return !SawStore; return true; } @@ -1340,11 +1349,11 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, return true; } -/// hasVolatileMemoryRef - Return true if this instruction may have a -/// volatile memory reference, or if the information describing the -/// memory reference is not available. Return false if it is known to -/// have no volatile memory references. -bool MachineInstr::hasVolatileMemoryRef() const { +/// hasOrderedMemoryRef - Return true if this instruction may have an ordered +/// or volatile memory reference, or if the information describing the memory +/// reference is not available. Return false if it is known to have no ordered +/// memory references. +bool MachineInstr::hasOrderedMemoryRef() const { // An instruction known never to access memory won't have a volatile access. if (!mayStore() && !mayLoad() && @@ -1357,9 +1366,9 @@ bool MachineInstr::hasVolatileMemoryRef() const { if (memoperands_empty()) return true; - // Check the memory reference information for volatile references. + // Check the memory reference information for ordered references. for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) - if ((*I)->isVolatile()) + if (!(*I)->isUnordered()) return true; return false; @@ -1461,7 +1470,9 @@ void MachineInstr::copyImplicitOps(const MachineInstr *MI) { } void MachineInstr::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " " << *this; +#endif } static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, @@ -1540,6 +1551,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " [sideeffect]"; if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; + if (getInlineAsmDialect() == InlineAsm::AD_ATT) + OS << " [attdialect]"; + if (getInlineAsmDialect() == InlineAsm::AD_Intel) + OS << " [inteldialect]"; StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; FirstOp = false; diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index b7de7bf..1f7fbfc 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -109,10 +109,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); - SmallVector LocalDefs; - SmallSet LocalDefSet; + SmallVector LocalDefs; + SmallSet LocalDefSet; SmallSet DeadDefSet; - SmallSet KilledDefSet; + SmallSet KilledDefSet; SmallVector ExternUses; SmallSet ExternUseSet; SmallSet KilledUseSet; @@ -181,7 +181,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, Defs.clear(); } - SmallSet Added; + SmallSet Added; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; if (Added.insert(Reg)) { @@ -248,10 +248,10 @@ bool llvm::finalizeBundles(MachineFunction &MF) { // MachineOperand iterator //===----------------------------------------------------------------------===// -MachineOperandIteratorBase::RegInfo +MachineOperandIteratorBase::VirtRegInfo MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, SmallVectorImpl > *Ops) { - RegInfo RI = { false, false, false }; + VirtRegInfo RI = { false, false, false }; for(; isValid(); ++*this) { MachineOperand &MO = deref(); if (!MO.isReg() || MO.getReg() != Reg) @@ -276,3 +276,53 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, } return RI; } + +MachineOperandIteratorBase::PhysRegInfo +MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, + const TargetRegisterInfo *TRI) { + bool AllDefsDead = true; + PhysRegInfo PRI = {false, false, false, false, false, false, false}; + + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "analyzePhysReg not given a physical register!"); + for (; isValid(); ++*this) { + MachineOperand &MO = deref(); + + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + PRI.Clobbers = true; // Regmask clobbers Reg. + + if (!MO.isReg()) + continue; + + unsigned MOReg = MO.getReg(); + if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) + continue; + + bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg); + bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg); + + if (IsRegOrSuperReg && MO.readsReg()) { + // Reg or a super-reg is read, and perhaps killed also. + PRI.Reads = true; + PRI.Kills = MO.isKill(); + } if (IsRegOrOverlapping && MO.readsReg()) { + PRI.ReadsOverlap = true;// Reg or an overlapping register is read. + } + + if (!MO.isDef()) + continue; + + if (IsRegOrSuperReg) { + PRI.Defines = true; // Reg or a super-register is defined. + if (!MO.isDead()) + AllDefsDead = false; + } + if (IsRegOrOverlapping) + PRI.Clobbers = true; // Reg or an overlapping reg is defined. + } + + if (AllDefsDead && PRI.Defines) + PRI.DefinesDead = true; // Reg or super-register was defined and was dead. + + return PRI; +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index efec481..169443e 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -334,7 +334,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); else DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); - DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); + DEBUG(dbgs() << MF.getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 9f3829e..27afeec 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -74,6 +74,8 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { return BotMBB; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineLoop::dump() const { print(dbgs()); } +#endif diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index ea98b23..005bf78 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; using namespace llvm::dwarf; -// Handle the Pass registration stuff necessary to use TargetData's. +// Handle the Pass registration stuff necessary to use DataLayout's. INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", "Machine Module Information", false, false) char MachineModuleInfo::ID = 0; diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 5ab56c0..a1c7e9f 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -21,8 +21,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// // Out of line virtual method. -void MachineModuleInfoMachO::Anchor() {} -void MachineModuleInfoELF::Anchor() {} +void MachineModuleInfoMachO::anchor() {} +void MachineModuleInfoELF::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { typedef std::pair PairTy; diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp new file mode 100644 index 0000000..c3f6e92 --- /dev/null +++ b/lib/CodeGen/MachinePostDominators.cpp @@ -0,0 +1,55 @@ +//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// post dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePostDominators.h" + +using namespace llvm; + +char MachinePostDominatorTree::ID = 0; + +//declare initializeMachinePostDominatorTreePass +INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", + "MachinePostDominator Tree Construction", true, true) + +MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { + initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); + DT = new DominatorTreeBase(true); //true indicate + // postdominator +} + +FunctionPass * +MachinePostDominatorTree::createMachinePostDominatorTreePass() { + return new MachinePostDominatorTree(); +} + +bool +MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + return false; +} + +MachinePostDominatorTree::~MachinePostDominatorTree() { + delete DT; +} + +void +MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void +MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const { + DT->print(OS); +} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 5fb938f..95d7a7d 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -21,7 +21,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) : TRI(&TRI), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedPhysRegs.resize(TRI.getNumRegs()); + UsedRegUnits.resize(TRI.getNumRegUnits()); UsedPhysRegMask.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -32,7 +32,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) MachineRegisterInfo::~MachineRegisterInfo() { #ifndef NDEBUG clearVirtRegs(); - for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) assert(!PhysRegUseDefLists[i] && "PhysRegUseDefLists has entries after all instructions are deleted"); #endif @@ -306,22 +306,18 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const { void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { ReservedRegs = TRI->getReservedRegs(MF); + assert(ReservedRegs.size() == TRI->getNumRegs() && + "Invalid ReservedRegs vector from target"); } bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, const MachineFunction &MF) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - // Check if any overlapping register is modified. + // Check if any overlapping register is modified, or allocatable so it may be + // used later. for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) - if (!def_empty(*AI)) - return false; - - // Check if any overlapping register is allocatable so it may be used later. - if (AllocatableRegs.empty()) - AllocatableRegs = TRI->getAllocatableSet(MF); - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) - if (AllocatableRegs.test(*AI)) + if (!def_empty(*AI) || isAllocatable(*AI)) return false; return true; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a1dc948..a4817d0 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -18,11 +18,8 @@ #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGILP.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -35,10 +32,12 @@ using namespace llvm; -static cl::opt ForceTopDown("misched-topdown", cl::Hidden, - cl::desc("Force top-down list scheduling")); -static cl::opt ForceBottomUp("misched-bottomup", cl::Hidden, - cl::desc("Force bottom-up list scheduling")); +namespace llvm { +cl::opt ForceTopDown("misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +cl::opt ForceBottomUp("misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); +} #ifndef NDEBUG static cl::opt ViewMISchedDAGs("view-misched-dags", cl::Hidden, @@ -50,6 +49,15 @@ static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG +// Threshold to very roughly model an out-of-order processor's instruction +// buffers. If the actual value of this threshold matters much in practice, then +// it can be specified by the machine model. For now, it's an experimental +// tuning knob to determine when and if it matters. +static cl::opt ILPWindow("ilp-window", cl::Hidden, + cl::desc("Allow expected latency to exceed the critical path by N cycles " + "before attempting to balance ILP"), + cl::init(10U)); + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -221,7 +229,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The Scheduler may insert instructions during either schedule() or // exitRegion(), even for empty regions. So the local iterators 'I' and // 'RegionEnd' are invalid across these calls. - unsigned RemainingCount = MBB->size(); + unsigned RemainingInstrs = MBB->size(); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { @@ -230,19 +238,19 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { --RegionEnd; // Count the boundary instruction. - --RemainingCount; + --RemainingInstrs; } // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingCount) { + for(;I != MBB->begin(); --I, --RemainingInstrs) { if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount); + Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == llvm::prior(RegionEnd)) { @@ -252,11 +260,11 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { continue; } DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF->getFunction()->getName() + DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingCount << "\n"); + dbgs() << " Remaining: " << RemainingInstrs << "\n"); // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -269,7 +277,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // scheduler for the top of it's scheduled region. RegionEnd = Scheduler->begin(); } - assert(RemainingCount == 0 && "Instruction count mismatch!"); + assert(RemainingInstrs == 0 && "Instruction count mismatch!"); Scheduler->finishBlock(); } Scheduler->finalizeSchedule(); @@ -281,157 +289,20 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { // unimplemented } -//===----------------------------------------------------------------------===// -// MachineSchedStrategy - Interface to a machine scheduling algorithm. -//===----------------------------------------------------------------------===// - -namespace { -class ScheduleDAGMI; - -/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected -/// scheduling algorithm. -/// -/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it -/// in ScheduleDAGInstrs.h -class MachineSchedStrategy { -public: - virtual ~MachineSchedStrategy() {} - - /// Initialize the strategy after building the DAG for a new region. - virtual void initialize(ScheduleDAGMI *DAG) = 0; - - /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to - /// schedule the node at the top of the unscheduled region. Otherwise it will - /// be scheduled at the bottom. - virtual SUnit *pickNode(bool &IsTopNode) = 0; - - /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node. - virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; - - /// When all predecessor dependencies have been resolved, free this node for - /// top-down scheduling. - virtual void releaseTopNode(SUnit *SU) = 0; - /// When all successor dependencies have been resolved, free this node for - /// bottom-up scheduling. - virtual void releaseBottomNode(SUnit *SU) = 0; -}; -} // namespace +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ReadyQueue::dump() { + dbgs() << Name << ": "; + for (unsigned i = 0, e = Queue.size(); i < e; ++i) + dbgs() << Queue[i]->NodeNum << " "; + dbgs() << "\n"; +} +#endif //===----------------------------------------------------------------------===// // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals // preservation. //===----------------------------------------------------------------------===// -namespace { -/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules -/// machine instructions while updating LiveIntervals. -class ScheduleDAGMI : public ScheduleDAGInstrs { - AliasAnalysis *AA; - RegisterClassInfo *RegClassInfo; - MachineSchedStrategy *SchedImpl; - - MachineBasicBlock::iterator LiveRegionEnd; - - /// Register pressure in this region computed by buildSchedGraph. - IntervalPressure RegPressure; - RegPressureTracker RPTracker; - - /// List of pressure sets that exceed the target's pressure limit before - /// scheduling, listed in increasing set ID order. Each pressure set is paired - /// with its max pressure in the currently scheduled regions. - std::vector RegionCriticalPSets; - - /// The top of the unscheduled zone. - MachineBasicBlock::iterator CurrentTop; - IntervalPressure TopPressure; - RegPressureTracker TopRPTracker; - - /// The bottom of the unscheduled zone. - MachineBasicBlock::iterator CurrentBottom; - IntervalPressure BotPressure; - RegPressureTracker BotRPTracker; - -#ifndef NDEBUG - /// The number of instructions scheduled so far. Used to cut off the - /// scheduler at the point determined by misched-cutoff. - unsigned NumInstrsScheduled; -#endif -public: - ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): - ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), - AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), - RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), - CurrentBottom(), BotRPTracker(BotPressure) { -#ifndef NDEBUG - NumInstrsScheduled = 0; -#endif - } - - ~ScheduleDAGMI() { - delete SchedImpl; - } - - MachineBasicBlock::iterator top() const { return CurrentTop; } - MachineBasicBlock::iterator bottom() const { return CurrentBottom; } - - /// Implement the ScheduleDAGInstrs interface for handling the next scheduling - /// region. This covers all instructions in a block, while schedule() may only - /// cover a subset. - void enterRegion(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endcount); - - /// Implement ScheduleDAGInstrs interface for scheduling a sequence of - /// reorderable instructions. - void schedule(); - - /// Get current register pressure for the top scheduled instructions. - const IntervalPressure &getTopPressure() const { return TopPressure; } - const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } - - /// Get current register pressure for the bottom scheduled instructions. - const IntervalPressure &getBotPressure() const { return BotPressure; } - const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } - - /// Get register pressure for the entire scheduling region before scheduling. - const IntervalPressure &getRegPressure() const { return RegPressure; } - - const std::vector &getRegionCriticalPSets() const { - return RegionCriticalPSets; - } - - /// getIssueWidth - Return the max instructions per scheduling group. - unsigned getIssueWidth() const { - return (InstrItins && InstrItins->SchedModel) - ? InstrItins->SchedModel->IssueWidth : 1; - } - - /// getNumMicroOps - Return the number of issue slots required for this MI. - unsigned getNumMicroOps(MachineInstr *MI) const { - if (!InstrItins) return 1; - int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass()); - return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI); - } - -protected: - void initRegPressure(); - void updateScheduledPressure(std::vector NewMaxPressure); - - void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); - bool checkSchedLimit(); - - void releaseRoots(); - - void releaseSucc(SUnit *SU, SDep *SuccEdge); - void releaseSuccessors(SUnit *SU); - void releasePred(SUnit *SU, SDep *PredEdge); - void releasePredecessors(SUnit *SU); - - void placeDebugValues(); -}; -} // namespace - /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. /// @@ -498,7 +369,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI, BB->splice(InsertPos, BB, MI); // Update LiveIntervals - LIS->handleMove(MI); + LIS->handleMove(MI, /*UpdateFlags=*/true); // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) @@ -565,6 +436,9 @@ void ScheduleDAGMI::initRegPressure() { std::vector RegionPressure = RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { unsigned Limit = TRI->getRegPressureSetLimit(i); + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << "Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); if (RegionPressure[i] > Limit) RegionCriticalPSets.push_back(PressureElement(i, 0)); } @@ -587,6 +461,74 @@ updateScheduledPressure(std::vector NewMaxPressure) { } } +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +/// +/// This is a skeletal driver, with all the functionality pushed into helpers, +/// so that it can be easilly extended by experimental schedulers. Generally, +/// implementing MachineSchedStrategy should be sufficient to implement a new +/// scheduling algorithm. However, if a scheduler further subclasses +/// ScheduleDAGMI then it will want to override this virtual method in order to +/// update any specialized state. +void ScheduleDAGMI::schedule() { + buildDAGWithRegPressure(); + + postprocessDAG(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + if (ViewMISchedDAGs) viewGraph(); + + initQueues(); + + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + assert(!SU->isScheduled && "Node already scheduled"); + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = top()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// Build the DAG and setup three register pressure trackers. +void ScheduleDAGMI::buildDAGWithRegPressure() { + // Initialize the register pressure tracker used by buildSchedGraph. + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Account for liveness generate by the region boundary. + if (LiveRegionEnd != RegionEnd) + RPTracker.recede(); + + // Build the DAG, and compute current register pressure. + buildSchedGraph(AA, &RPTracker); + if (ViewMISchedDAGs) viewGraph(); + + // Initialize top/bottom trackers after computing region pressure. + initRegPressure(); +} + +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { + for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { + Mutations[i]->apply(this); + } +} + // Release all DAG roots for scheduling. void ScheduleDAGMI::releaseRoots() { SmallVector BotRoots; @@ -607,28 +549,10 @@ void ScheduleDAGMI::releaseRoots() { SchedImpl->releaseBottomNode(*I); } -/// schedule - Called back from MachineScheduler::runOnMachineFunction -/// after setting up the current scheduling region. [RegionBegin, RegionEnd) -/// only includes instructions that have DAG nodes, not scheduling boundaries. -void ScheduleDAGMI::schedule() { - // Initialize the register pressure tracker used by buildSchedGraph. - RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); - - // Account for liveness generate by the region boundary. - if (LiveRegionEnd != RegionEnd) - RPTracker.recede(); - - // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker); - - // Initialize top/bottom trackers after computing region pressure. - initRegPressure(); - - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); - - if (ViewMISchedDAGs) viewGraph(); +/// Identify DAG roots and setup scheduler queues. +void ScheduleDAGMI::initQueues() { + // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); // Release edges from the special Entry node or to the special Exit node. @@ -638,61 +562,64 @@ void ScheduleDAGMI::schedule() { // Release all DAG roots for scheduling. releaseRoots(); + SchedImpl->registerRoots(); + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); CurrentBottom = RegionEnd; - bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { - if (!checkSchedLimit()) - break; - - // Move the instruction to its new location in the instruction stream. - MachineInstr *MI = SU->getInstr(); - - if (IsTopNode) { - assert(SU->isTopReady() && "node still has unscheduled dependencies"); - if (&*CurrentTop == MI) - CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); - else { - moveInstruction(MI, CurrentTop); - TopRPTracker.setPos(MI); - } +} - // Update top scheduled pressure. - TopRPTracker.advance(); - assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); - updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); +/// Move an instruction and update register pressure. +void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { + // Move the instruction to its new location in the instruction stream. + MachineInstr *MI = SU->getInstr(); - // Release dependent instructions for scheduling. - releaseSuccessors(SU); + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else { + moveInstruction(MI, CurrentTop); + TopRPTracker.setPos(MI); } + + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; else { - assert(SU->isBottomReady() && "node still has unscheduled dependencies"); - MachineBasicBlock::iterator priorII = - priorNonDebug(CurrentBottom, CurrentTop); - if (&*priorII == MI) - CurrentBottom = priorII; - else { - if (&*CurrentTop == MI) { - CurrentTop = nextIfDebug(++CurrentTop, priorII); - TopRPTracker.setPos(CurrentTop); - } - moveInstruction(MI, CurrentBottom); - CurrentBottom = MI; + if (&*CurrentTop == MI) { + CurrentTop = nextIfDebug(++CurrentTop, priorII); + TopRPTracker.setPos(CurrentTop); } - // Update bottom scheduled pressure. - BotRPTracker.recede(); - assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); - updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); - - // Release dependent instructions for scheduling. - releasePredecessors(SU); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; } - SU->isScheduled = true; - SchedImpl->schedNode(SU, IsTopNode); + // Update bottom scheduled pressure. + BotRPTracker.recede(); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); } - assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); +} - placeDebugValues(); +/// Update scheduler queues after scheduling an instruction. +void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { + // Release dependent instructions for scheduling. + if (IsTopNode) + releaseSuccessors(SU); + else + releasePredecessors(SU); + + SU->isScheduled = true; + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); } /// Reinsert any remaining debug_values, just like the PostRA scheduler. @@ -716,91 +643,146 @@ void ScheduleDAGMI::placeDebugValues() { FirstDbgValue = NULL; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { + for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { + if (SUnit *SU = getSUnit(&(*MI))) + SU->dump(this); + else + dbgs() << "Missing SUnit\n"; + } +} +#endif + //===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// namespace { -/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience -/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified -/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. -class ReadyQueue { - unsigned ID; - std::string Name; - std::vector Queue; - +/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingScheduler : public MachineSchedStrategy { public: - ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} - - unsigned getID() const { return ID; } - - StringRef getName() const { return Name; } - - // SU is in this queue if it's NodeQueueID is a superset of this ID. - bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } - - bool empty() const { return Queue.empty(); } - - unsigned size() const { return Queue.size(); } - - typedef std::vector::iterator iterator; + /// Represent the type of SchedCandidate found within a single queue. + /// pickNodeBidirectional depends on these listed by decreasing priority. + enum CandReason { + NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, + BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, + SingleMax, MultiPressure, NextDefUse, NodeOrder}; - iterator begin() { return Queue.begin(); } +#ifndef NDEBUG + static const char *getReasonStr(ConvergingScheduler::CandReason Reason); +#endif - iterator end() { return Queue.end(); } + /// Policy for scheduling the next instruction in the candidate's zone. + struct CandPolicy { + bool ReduceLatency; + unsigned ReduceResIdx; + unsigned DemandResIdx; - iterator find(SUnit *SU) { - return std::find(Queue.begin(), Queue.end(), SU); - } + CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} + }; - void push(SUnit *SU) { - Queue.push_back(SU); - SU->NodeQueueId |= ID; - } + /// Status of an instruction's critical resource consumption. + struct SchedResourceDelta { + // Count critical resources in the scheduled region required by SU. + unsigned CritResources; - void remove(iterator I) { - (*I)->NodeQueueId &= ~ID; - *I = Queue.back(); - Queue.pop_back(); - } + // Count critical resources from another region consumed by SU. + unsigned DemandedResources; - void dump() { - dbgs() << Name << ": "; - for (unsigned i = 0, e = Queue.size(); i < e; ++i) - dbgs() << Queue[i]->NodeNum << " "; - dbgs() << "\n"; - } -}; + SchedResourceDelta(): CritResources(0), DemandedResources(0) {} -/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class ConvergingScheduler : public MachineSchedStrategy { + bool operator==(const SchedResourceDelta &RHS) const { + return CritResources == RHS.CritResources + && DemandedResources == RHS.DemandedResources; + } + bool operator!=(const SchedResourceDelta &RHS) const { + return !operator==(RHS); + } + }; /// Store the state used by ConvergingScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { + CandPolicy Policy; + // The best SUnit candidate. SUnit *SU; + // The reason for this candidate. + CandReason Reason; + // Register pressure values for the best candidate. RegPressureDelta RPDelta; - SchedCandidate(): SU(NULL) {} + // Critical resource consumption of the best candidate. + SchedResourceDelta ResDelta; + + SchedCandidate(const CandPolicy &policy) + : Policy(policy), SU(NULL), Reason(NoCand) {} + + bool isValid() const { return SU; } + + // Copy the status of another candidate without changing policy. + void setBest(SchedCandidate &Best) { + assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + SU = Best.SU; + Reason = Best.Reason; + RPDelta = Best.RPDelta; + ResDelta = Best.ResDelta; + } + + void initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel); + }; + + /// Summarize the unscheduled region. + struct SchedRemainder { + // Critical path through the DAG in expected latency. + unsigned CriticalPath; + + // Unscheduled resources + SmallVector RemainingCounts; + // Critical resource for the unscheduled zone. + unsigned CritResIdx; + // Number of micro-ops left to schedule. + unsigned RemainingMicroOps; + // Is the unscheduled zone resource limited. + bool IsResourceLimited; + + unsigned MaxRemainingCount; + + void reset() { + CriticalPath = 0; + RemainingCounts.clear(); + CritResIdx = 0; + RemainingMicroOps = 0; + IsResourceLimited = false; + MaxRemainingCount = 0; + } + + SchedRemainder() { reset(); } + + void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; - /// Represent the type of SchedCandidate found within a single queue. - enum CandResult { - NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure }; /// Each Scheduling boundary is associated with ready queues. It tracks the - /// current cycle in whichever direction at has moved, and maintains the state + /// current cycle in the direction of movement, and maintains the state /// of "hazards" and other interlocks at the current cycle. struct SchedBoundary { ScheduleDAGMI *DAG; + const TargetSchedModel *SchedModel; + SchedRemainder *Rem; ReadyQueue Available; ReadyQueue Pending; bool CheckPending; + // For heuristics, keep a list of the nodes that immediately depend on the + // most recently scheduled node. + SmallPtrSet NextSUs; + ScheduleHazardRecognizer *HazardRec; unsigned CurrCycle; @@ -809,29 +791,88 @@ class ConvergingScheduler : public MachineSchedStrategy { /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle; + // The expected latency of the critical path in this scheduled zone. + unsigned ExpectedLatency; + + // Resources used in the scheduled zone beyond this boundary. + SmallVector ResourceCounts; + + // Cache the critical resources ID in this scheduled zone. + unsigned CritResIdx; + + // Is the scheduled region resource limited vs. latency limited. + bool IsResourceLimited; + + unsigned ExpectedCount; + + // Policy flag: attempt to find ILP until expected latency is covered. + bool ShouldIncreaseILP; + +#ifndef NDEBUG // Remember the greatest min operand latency. unsigned MaxMinLatency; +#endif + + void reset() { + Available.clear(); + Pending.clear(); + CheckPending = false; + NextSUs.clear(); + HazardRec = 0; + CurrCycle = 0; + IssueCount = 0; + MinReadyCycle = UINT_MAX; + ExpectedLatency = 0; + ResourceCounts.resize(1); + assert(!ResourceCounts[0] && "nonzero count for bad resource"); + CritResIdx = 0; + IsResourceLimited = false; + ExpectedCount = 0; + ShouldIncreaseILP = false; +#ifndef NDEBUG + MaxMinLatency = 0; +#endif + // Reserve a zero-count for invalid CritResIdx. + ResourceCounts.resize(1); + } /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): - DAG(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), - CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), - MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { + reset(); + } ~SchedBoundary() { delete HazardRec; } + void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, + SchedRemainder *rem); + bool isTop() const { return Available.getID() == ConvergingScheduler::TopQID; } + unsigned getUnscheduledLatency(SUnit *SU) const { + if (isTop()) + return SU->getHeight(); + return SU->getDepth(); + } + + unsigned getCriticalCount() const { + return ResourceCounts[CritResIdx]; + } + bool checkHazard(SUnit *SU); + void checkILPPolicy(); + void releaseNode(SUnit *SU, unsigned ReadyCycle); void bumpCycle(); + void countResource(unsigned PIdx, unsigned Cycles); + void bumpNode(SUnit *SU); void releasePending(); @@ -841,10 +882,13 @@ class ConvergingScheduler : public MachineSchedStrategy { SUnit *pickOnlyChoice(); }; +private: ScheduleDAGMI *DAG; + const TargetSchedModel *SchedModel; const TargetRegisterInfo *TRI; // State of the top and bottom scheduled instruction boundaries. + SchedRemainder Rem; SchedBoundary Top; SchedBoundary Bot; @@ -857,7 +901,7 @@ public: }; ConvergingScheduler(): - DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} virtual void initialize(ScheduleDAGMI *dag); @@ -869,28 +913,80 @@ public: virtual void releaseBottomNode(SUnit *SU); + virtual void registerRoots(); + protected: - SUnit *pickNodeBidrectional(bool &IsTopNode); + void balanceZones( + ConvergingScheduler::SchedBoundary &CriticalZone, + ConvergingScheduler::SchedCandidate &CriticalCand, + ConvergingScheduler::SchedBoundary &OppositeZone, + ConvergingScheduler::SchedCandidate &OppositeCand); + + void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, + ConvergingScheduler::SchedCandidate &BotCand); + + void tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker); + + SUnit *pickNodeBidirectional(bool &IsTopNode); + + void pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); - CandResult pickNodeFromQueue(ReadyQueue &Q, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); #ifndef NDEBUG - void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, - PressureElement P = PressureElement()); + void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); #endif }; } // namespace +void ConvergingScheduler::SchedRemainder:: +init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { + reset(); + if (!SchedModel->hasInstrSchedModel()) + return; + RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); + for (std::vector::iterator + I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); + RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + unsigned Factor = SchedModel->getResourceFactor(PIdx); + RemainingCounts[PIdx] += (Factor * PI->Cycles); + } + } +} + +void ConvergingScheduler::SchedBoundary:: +init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { + reset(); + DAG = dag; + SchedModel = smodel; + Rem = rem; + if (SchedModel->hasInstrSchedModel()) + ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); +} + void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; + SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; - Top.DAG = dag; - Bot.DAG = dag; + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + Bot.init(DAG, SchedModel, &Rem); + + // Initialize resource counts. - // Initialize the HazardRecognizers. + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); const TargetMachine &TM = DAG->MF.getTarget(); - const InstrItineraryData *Itin = TM.getInstrItineraryData(); Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); @@ -905,13 +1001,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; } Top.releaseNode(SU, SU->TopReadyCycle); } @@ -925,17 +1020,27 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; } Bot.releaseNode(SU, SU->BotReadyCycle); } +void ConvergingScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + // Some roots may not feed into ExitSU. Check all of them in case. + for (std::vector::const_iterator + I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -953,14 +1058,27 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { if (HazardRec->isEnabled()) return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; - if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" + << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; - + } return false; } +/// If expected latency is covered, disable ILP policy. +void ConvergingScheduler::SchedBoundary::checkILPPolicy() { + if (ShouldIncreaseILP + && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { + ShouldIncreaseILP = false; + DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); + } +} + void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -970,15 +1088,31 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, Pending.push(SU); else Available.push(SU); + + // Record this node as an immediate dependent of the scheduled node. + NextSUs.insert(SU); + + // If CriticalPath has been computed, then check if the unscheduled nodes + // exceed the ILP window. Before registerRoots, CriticalPath==0. + if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) + > Rem->CriticalPath + ILPWindow)) { + ShouldIncreaseILP = true; + DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " + << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); + } } /// Move the boundary of scheduled code by one cycle. void ConvergingScheduler::SchedBoundary::bumpCycle() { - unsigned Width = DAG->getIssueWidth(); + unsigned Width = SchedModel->getIssueWidth(); IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + unsigned NextCycle = CurrCycle + 1; assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); - unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + if (MinReadyCycle > NextCycle) { + IssueCount = 0; + NextCycle = MinReadyCycle; + } if (!HazardRec->isEnabled()) { // Bypass HazardRec virtual calls. @@ -994,11 +1128,39 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { } } CheckPending = true; + IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); - DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + DEBUG(dbgs() << " *** " << Available.getName() << " cycle " << CurrCycle << '\n'); } +/// Add the given processor resource to this scheduled zone. +void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, + unsigned Cycles) { + unsigned Factor = SchedModel->getResourceFactor(PIdx); + DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name + << " +(" << Cycles << "x" << Factor + << ") / " << SchedModel->getLatencyFactor() << '\n'); + + unsigned Count = Factor * Cycles; + ResourceCounts[PIdx] += Count; + assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); + Rem->RemainingCounts[PIdx] -= Count; + + // Reset MaxRemainingCount for sanity. + Rem->MaxRemainingCount = 0; + + // Check if this resource exceeds the current critical resource by a full + // cycle. If so, it becomes the critical resource. + if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) + >= (int)SchedModel->getLatencyFactor()) { + CritResIdx = PIdx; + DEBUG(dbgs() << " *** Critical resource " + << SchedModel->getProcResource(PIdx)->Name << " x" + << ResourceCounts[PIdx] << '\n'); + } +} + /// Move the boundary of scheduled code by one SUnit. void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. @@ -1010,11 +1172,38 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + // Update resource counts and critical resource. + if (SchedModel->hasInstrSchedModel()) { + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + countResource(PI->ProcResourceIdx, PI->Cycles); + } + } + if (isTop()) { + if (SU->getDepth() > ExpectedLatency) + ExpectedLatency = SU->getDepth(); + } + else { + if (SU->getHeight() > ExpectedLatency) + ExpectedLatency = SU->getHeight(); + } + + IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); + // Check the instruction group dispatch limit. // TODO: Check if this SU must end a dispatch group. - IssueCount += DAG->getNumMicroOps(SU->getInstr()); - if (IssueCount >= DAG->getIssueWidth()) { - DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + + // checkHazard prevents scheduling multiple instructions per cycle that exceed + // issue width. However, we commonly reach the maximum. In this case + // opportunistically bump the cycle to avoid uselessly checking everything in + // the readyQ. Furthermore, a single instruction may produce more than one + // cycle's worth of micro-ops. + if (IssueCount >= SchedModel->getIssueWidth()) { + DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); bumpCycle(); } } @@ -1045,6 +1234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { Pending.remove(Pending.begin()+i); --i; --e; } + DEBUG(if (!Pending.empty()) Pending.dump()); CheckPending = false; } @@ -1059,12 +1249,23 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { } /// If this queue only has one ready candidate, return it. As a side effect, -/// advance the cycle until at least one node is ready. If multiple instructions -/// are ready, return NULL. +/// defer any nodes that now hit a hazard, and advance the cycle until at least +/// one node is ready. If multiple instructions are ready, return NULL. SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); + if (IssueCount > 0) { + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; + } + ++I; + } + } for (unsigned i = 0; Available.empty(); ++i) { assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && "permanent hazard"); (void)i; @@ -1076,18 +1277,262 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { return NULL; } -#ifndef NDEBUG -void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q, - SUnit *SU, PressureElement P) { - dbgs() << Label << " " << Q.getName() << " "; - if (P.isValid()) - dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease - << " "; - else - dbgs() << " "; - SU->dump(DAG); +/// Record the candidate policy for opposite zones with different critical +/// resources. +/// +/// If the CriticalZone is latency limited, don't force a policy for the +/// candidates here. Instead, When releasing each candidate, releaseNode +/// compares the region's critical path to the candidate's height or depth and +/// the scheduled zone's expected latency then sets ShouldIncreaseILP. +void ConvergingScheduler::balanceZones( + ConvergingScheduler::SchedBoundary &CriticalZone, + ConvergingScheduler::SchedCandidate &CriticalCand, + ConvergingScheduler::SchedBoundary &OppositeZone, + ConvergingScheduler::SchedCandidate &OppositeCand) { + + if (!CriticalZone.IsResourceLimited) + return; + + SchedRemainder *Rem = CriticalZone.Rem; + + // If the critical zone is overconsuming a resource relative to the + // remainder, try to reduce it. + unsigned RemainingCritCount = + Rem->RemainingCounts[CriticalZone.CritResIdx]; + if ((int)(Rem->MaxRemainingCount - RemainingCritCount) + > (int)SchedModel->getLatencyFactor()) { + CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; + DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " + << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name + << '\n'); + } + // If the other zone is underconsuming a resource relative to the full zone, + // try to increase it. + unsigned OppositeCount = + OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; + if ((int)(OppositeZone.ExpectedCount - OppositeCount) + > (int)SchedModel->getLatencyFactor()) { + OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; + DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " + << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name + << '\n'); + } +} + +/// Determine if the scheduled zones exceed resource limits or critical path and +/// set each candidate's ReduceHeight policy accordingly. +void ConvergingScheduler::checkResourceLimits( + ConvergingScheduler::SchedCandidate &TopCand, + ConvergingScheduler::SchedCandidate &BotCand) { + + Bot.checkILPPolicy(); + Top.checkILPPolicy(); + if (Bot.ShouldIncreaseILP) + BotCand.Policy.ReduceLatency = true; + if (Top.ShouldIncreaseILP) + TopCand.Policy.ReduceLatency = true; + + // Handle resource-limited regions. + if (Top.IsResourceLimited && Bot.IsResourceLimited + && Top.CritResIdx == Bot.CritResIdx) { + // If the scheduled critical resource in both zones is no longer the + // critical remaining resource, attempt to reduce resource height both ways. + if (Top.CritResIdx != Rem.CritResIdx) { + TopCand.Policy.ReduceResIdx = Top.CritResIdx; + BotCand.Policy.ReduceResIdx = Bot.CritResIdx; + DEBUG(dbgs() << "Reduce scheduled " + << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); + } + return; + } + // Handle latency-limited regions. + if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { + // If the total scheduled expected latency exceeds the region's critical + // path then reduce latency both ways. + // + // Just because a zone is not resource limited does not mean it is latency + // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle + // to exceed expected latency. + if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) + && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { + TopCand.Policy.ReduceLatency = true; + BotCand.Policy.ReduceLatency = true; + DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency + << " + " << Bot.ExpectedLatency << '\n'); + } + return; + } + // The critical resource is different in each zone, so request balancing. + + // Compute the cost of each zone. + Rem.MaxRemainingCount = std::max( + Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), + Rem.RemainingCounts[Rem.CritResIdx]); + Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); + Top.ExpectedCount = std::max( + Top.getCriticalCount(), + Top.ExpectedCount * SchedModel->getLatencyFactor()); + Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); + Bot.ExpectedCount = std::max( + Bot.getCriticalCount(), + Bot.ExpectedCount * SchedModel->getLatencyFactor()); + + balanceZones(Top, TopCand, Bot, BotCand); + balanceZones(Bot, BotCand, Top, TopCand); +} + +void ConvergingScheduler::SchedCandidate:: +initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel) { + if (!Policy.ReduceResIdx && !Policy.DemandResIdx) + return; + + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == Policy.ReduceResIdx) + ResDelta.CritResources += PI->Cycles; + if (PI->ProcResourceIdx == Policy.DemandResIdx) + ResDelta.DemandedResources += PI->Cycles; + } +} + +/// Return true if this heuristic determines order. +static bool tryLess(unsigned TryVal, unsigned CandVal, + ConvergingScheduler::SchedCandidate &TryCand, + ConvergingScheduler::SchedCandidate &Cand, + ConvergingScheduler::CandReason Reason) { + if (TryVal < CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal > CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + return false; +} +static bool tryGreater(unsigned TryVal, unsigned CandVal, + ConvergingScheduler::SchedCandidate &TryCand, + ConvergingScheduler::SchedCandidate &Cand, + ConvergingScheduler::CandReason Reason) { + if (TryVal > CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal < CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + return false; +} + +/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending. +/// \param RPTracker describes reg pressure within the scheduled zone. +/// \param TempTracker is a scratch pressure tracker to reuse in queries. +void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker) { + + // Always initialize TryCand's RPDelta. + TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + // Avoid exceeding the target's limit. + if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, + Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) + return; + if (Cand.Reason == SingleExcess) + Cand.Reason = MultiPressure; + + // Avoid increasing the max critical pressure in the scheduled region. + if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, + Cand.RPDelta.CriticalMax.UnitIncrease, + TryCand, Cand, SingleCritical)) + return; + if (Cand.Reason == SingleCritical) + Cand.Reason = MultiPressure; + + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() + > Zone.ExpectedCount) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, TopDepthReduce)) + return; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, TopPathReduce)) + return; + } + else { + if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() + > Zone.ExpectedCount) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, BotHeightReduce)) + return; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, BotPathReduce)) + return; + } + } + + // Avoid increasing the max pressure of the entire region. + if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, + Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) + return; + if (Cand.Reason == SingleMax) + Cand.Reason = MultiPressure; + + // Prefer immediate defs/users of the last scheduled instruction. This is a + // nice pressure avoidance strategy that also conserves the processor's + // register renaming resources and keeps the machine code readable. + if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) { + TryCand.Reason = NextDefUse; + return; + } + if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) { + if (Cand.Reason > NextDefUse) + Cand.Reason = NextDefUse; + return; + } + // Fall through to original instruction order. + if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) + || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } } -#endif /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is /// more desirable than RHS from scheduling standpoint. @@ -1098,109 +1543,144 @@ static bool compareRPDelta(const RegPressureDelta &LHS, // have UnitIncrease==0, so are neutral. // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) + if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { + DEBUG(dbgs() << "RP excess top - bot: " + << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; - + } // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) + if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { + DEBUG(dbgs() << "RP critical top - bot: " + << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) + << '\n'); return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; - + } // Avoid increasing the max pressure of the entire region. - if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) + if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { + DEBUG(dbgs() << "RP current top - bot: " + << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) + << '\n'); return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; - + } return false; } +#ifndef NDEBUG +const char *ConvergingScheduler::getReasonStr( + ConvergingScheduler::CandReason Reason) { + switch (Reason) { + case NoCand: return "NOCAND "; + case SingleExcess: return "REG-EXCESS"; + case SingleCritical: return "REG-CRIT "; + case SingleMax: return "REG-MAX "; + case MultiPressure: return "REG-MULTI "; + case ResourceReduce: return "RES-REDUCE"; + case ResourceDemand: return "RES-DEMAND"; + case TopDepthReduce: return "TOP-DEPTH "; + case TopPathReduce: return "TOP-PATH "; + case BotHeightReduce:return "BOT-HEIGHT"; + case BotPathReduce: return "BOT-PATH "; + case NextDefUse: return "DEF-USE "; + case NodeOrder: return "ORDER "; + }; + llvm_unreachable("Unknown reason!"); +} + +void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, + const SchedBoundary &Zone) { + const char *Label = getReasonStr(Cand.Reason); + PressureElement P; + unsigned ResIdx = 0; + unsigned Latency = 0; + switch (Cand.Reason) { + default: + break; + case SingleExcess: + P = Cand.RPDelta.Excess; + break; + case SingleCritical: + P = Cand.RPDelta.CriticalMax; + break; + case SingleMax: + P = Cand.RPDelta.CurrentMax; + break; + case ResourceReduce: + ResIdx = Cand.Policy.ReduceResIdx; + break; + case ResourceDemand: + ResIdx = Cand.Policy.DemandResIdx; + break; + case TopDepthReduce: + Latency = Cand.SU->getDepth(); + break; + case TopPathReduce: + Latency = Cand.SU->getHeight(); + break; + case BotHeightReduce: + Latency = Cand.SU->getHeight(); + break; + case BotPathReduce: + Latency = Cand.SU->getDepth(); + break; + } + dbgs() << Label << " " << Zone.Available.getName() << " "; + if (P.isValid()) + dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease + << " "; + else + dbgs() << " "; + if (ResIdx) + dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; + else + dbgs() << " "; + if (Latency) + dbgs() << Latency << " cycles "; + else + dbgs() << " "; + Cand.SU->dump(DAG); +} +#endif + /// Pick the best candidate from the top queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. -ConvergingScheduler::CandResult ConvergingScheduler:: -pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, - SchedCandidate &Candidate) { +void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { + ReadyQueue &Q = Zone.Available; + DEBUG(Q.dump()); // getMaxPressureDelta temporarily modifies the tracker. RegPressureTracker &TempTracker = const_cast(RPTracker); - // BestSU remains NULL if no top candidates beat the best existing candidate. - CandResult FoundCandidate = NoCand; for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { - RegPressureDelta RPDelta; - TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, - DAG->getRegionCriticalPSets(), - DAG->getRegPressure().MaxSetPressure); - - // Initialize the candidate if needed. - if (!Candidate.SU) { - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = NodeOrder; - continue; - } - // Avoid exceeding the target's limit. - if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) { - DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = SingleExcess; - continue; - } - if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease) - continue; - if (FoundCandidate == SingleExcess) - FoundCandidate = MultiPressure; - - // Avoid increasing the max critical pressure in the scheduled region. - if (RPDelta.CriticalMax.UnitIncrease - < Candidate.RPDelta.CriticalMax.UnitIncrease) { - DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = SingleCritical; - continue; - } - if (RPDelta.CriticalMax.UnitIncrease - > Candidate.RPDelta.CriticalMax.UnitIncrease) - continue; - if (FoundCandidate == SingleCritical) - FoundCandidate = MultiPressure; - - // Avoid increasing the max pressure of the entire region. - if (RPDelta.CurrentMax.UnitIncrease - < Candidate.RPDelta.CurrentMax.UnitIncrease) { - DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = SingleMax; - continue; - } - if (RPDelta.CurrentMax.UnitIncrease - > Candidate.RPDelta.CurrentMax.UnitIncrease) - continue; - if (FoundCandidate == SingleMax) - FoundCandidate = MultiPressure; - - // Fall through to original instruction order. - // Only consider node order if Candidate was chosen from this Q. - if (FoundCandidate == NoCand) - continue; - if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) - || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { - DEBUG(traceCandidate("NCAND", Q, *I)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = NodeOrder; + SchedCandidate TryCand(Cand.Policy); + TryCand.SU = *I; + tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); + if (TryCand.Reason != NoCand) { + // Initialize resource delta if needed in case future heuristics query it. + if (TryCand.ResDelta == SchedResourceDelta()) + TryCand.initResourceDelta(DAG, SchedModel); + Cand.setBest(TryCand); + DEBUG(traceCandidate(Cand, Zone)); } + TryCand.SU = *I; } - return FoundCandidate; +} + +static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, + bool IsTop) { + DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") + << " SU(" << Cand.SU->NodeNum << ") " + << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); } /// Pick the best candidate node from either the top or bottom queue. -SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { +SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { @@ -1211,11 +1691,14 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { IsTopNode = true; return SU; } - SchedCandidate BotCand; + CandPolicy NoPolicy; + SchedCandidate BotCand(NoPolicy); + SchedCandidate TopCand(NoPolicy); + checkResourceLimits(TopCand, BotCand); + // Prefer bottom scheduling when heuristics are silent. - CandResult BotResult = pickNodeFromQueue(Bot.Available, - DAG->getBotRPTracker(), BotCand); - assert(BotResult != NoCand && "failed to find the first candidate"); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); // If either Q has a single candidate that provides the least increase in // Excess pressure, we can immediately schedule from that Q. @@ -1224,37 +1707,41 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { // affects picking from either Q. If scheduling in one direction must // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. - if (BotResult == SingleExcess || BotResult == SingleCritical) { + if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { IsTopNode = false; + tracePick(BotCand, IsTopNode); return BotCand.SU; } // Check if the top Q has a better candidate. - SchedCandidate TopCand; - CandResult TopResult = pickNodeFromQueue(Top.Available, - DAG->getTopRPTracker(), TopCand); - assert(TopResult != NoCand && "failed to find the first candidate"); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); - if (TopResult == SingleExcess || TopResult == SingleCritical) { - IsTopNode = true; - return TopCand.SU; - } // If either Q has a single candidate that minimizes pressure above the // original region's pressure pick it. - if (BotResult == SingleMax) { + if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { + if (TopCand.Reason < BotCand.Reason) { + IsTopNode = true; + tracePick(TopCand, IsTopNode); + return TopCand.SU; + } IsTopNode = false; + tracePick(BotCand, IsTopNode); return BotCand.SU; } - if (TopResult == SingleMax) { + // Check for a salient pressure difference and pick the best from either side. + if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { IsTopNode = true; + tracePick(TopCand, IsTopNode); return TopCand.SU; } - // Check for a salient pressure difference and pick the best from either side. - if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { + // Otherwise prefer the bottom candidate, in node order if all else failed. + if (TopCand.Reason < BotCand.Reason) { IsTopNode = true; + tracePick(TopCand, IsTopNode); return TopCand.SU; } - // Otherwise prefer the bottom candidate in node order. IsTopNode = false; + tracePick(BotCand, IsTopNode); return BotCand.SU; } @@ -1266,33 +1753,34 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { return NULL; } SUnit *SU; - if (ForceTopDown) { - SU = Top.pickOnlyChoice(); - if (!SU) { - SchedCandidate TopCand; - CandResult TopResult = - pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); - assert(TopResult != NoCand && "failed to find the first candidate"); - (void)TopResult; - SU = TopCand.SU; + do { + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate TopCand(NoPolicy); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + SU = TopCand.SU; + } + IsTopNode = true; } - IsTopNode = true; - } - else if (ForceBottomUp) { - SU = Bot.pickOnlyChoice(); - if (!SU) { - SchedCandidate BotCand; - CandResult BotResult = - pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); - assert(BotResult != NoCand && "failed to find the first candidate"); - (void)BotResult; - SU = BotCand.SU; + else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate BotCand(NoPolicy); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + SU = BotCand.SU; + } + IsTopNode = false; } - IsTopNode = false; - } - else { - SU = pickNodeBidrectional(IsTopNode); - } + else { + SU = pickNodeBidirectional(IsTopNode); + } + } while (SU->isScheduled); + if (SU->isTopReady()) Top.removeReady(SU); if (SU->isBottomReady()) @@ -1331,6 +1819,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched); //===----------------------------------------------------------------------===// +// ILP Scheduler. Currently for experimental analysis of heuristics. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Order nodes by the ILP metric. +struct ILPOrder { + ScheduleDAGILP *ILP; + bool MaximizeILP; + + ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} + + /// \brief Apply a less-than relation on node priority. + bool operator()(const SUnit *A, const SUnit *B) const { + // Return true if A comes after B in the Q. + if (MaximizeILP) + return ILP->getILP(A) < ILP->getILP(B); + else + return ILP->getILP(A) > ILP->getILP(B); + } +}; + +/// \brief Schedule based on the ILP metric. +class ILPScheduler : public MachineSchedStrategy { + ScheduleDAGILP ILP; + ILPOrder Cmp; + + std::vector ReadyQ; +public: + ILPScheduler(bool MaximizeILP) + : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} + + virtual void initialize(ScheduleDAGMI *DAG) { + ReadyQ.clear(); + ILP.resize(DAG->SUnits.size()); + } + + virtual void registerRoots() { + for (std::vector::const_iterator + I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { + ILP.computeILP(*I); + } + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + if (ReadyQ.empty()) return NULL; + pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + SUnit *SU = ReadyQ.back(); + ReadyQ.pop_back(); + IsTopNode = false; + DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() + << " ILP: " << ILP.getILP(SU) << '\n'); + return SU; + } + + virtual void schedNode(SUnit *, bool) {} + + virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + + virtual void releaseBottomNode(SUnit *SU) { + ReadyQ.push_back(SU); + std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } +}; +} // namespace + +static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(true)); +} +static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(false)); +} +static MachineSchedRegistry ILPMaxRegistry( + "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); +static MachineSchedRegistry ILPMinRegistry( + "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); + +//===----------------------------------------------------------------------===// // Machine Instruction Shuffler for Correctness Testing //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index bc383cb..b117f8c 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -49,7 +49,6 @@ namespace { MachineDominatorTree *DT; // Machine dominator tree MachineLoopInfo *LI; AliasAnalysis *AA; - BitVector AllocatableSet; // Which physregs are allocatable? // Remember which edges have been considered for breaking. SmallSet, 8> @@ -229,7 +228,6 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis(); LI = &getAnalysis(); AA = &getAnalysis(); - AllocatableSet = TRI->getAllocatableSet(MF); bool EverMadeChange = false; diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 1a3aa60..9686b04 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -14,9 +14,10 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/PostOrderIterator.h" @@ -50,9 +51,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF = &Func; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); - ItinData = MF->getTarget().getInstrItineraryData(); MRI = &MF->getRegInfo(); Loops = &getAnalysis(); + const TargetSubtargetInfo &ST = + MF->getTarget().getSubtarget(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); return false; } @@ -674,7 +677,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; - if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head) + if (!DefTBI.isEarlierInSameTrace(TBI)) continue; unsigned Len = LIR.Height + Cycles[DefMI].Depth; MaxLen = std::max(MaxLen, Len); @@ -737,16 +740,15 @@ computeInstrDepths(const MachineBasicBlock *MBB) { const TraceBlockInfo&DepTBI = BlockInfo[Dep.DefMI->getParent()->getNumber()]; // Ignore dependencies from outside the current trace. - if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head) + if (!DepTBI.isEarlierInSameTrace(TBI)) continue; assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, - Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp, - /* FindMin = */ false); + DepCycle += MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp, + /* FindMin = */ false); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. @@ -769,7 +771,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Height is the issue height computed from virtual register dependencies alone. static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, SparseSet &RegUnits, - const InstrItineraryData *ItinData, + const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { SmallVector ReadOps; @@ -792,14 +794,10 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, unsigned DepHeight = I->Cycle; if (!MI->isTransient()) { // We may not know the UseMI of this dependency, if it came from the - // live-in list. - if (I->MI) - DepHeight += TII->computeOperandLatency(ItinData, - MI, MO.getOperandNo(), - I->MI, I->Op); - else - // No UseMI. Just use the MI latency instead. - DepHeight += TII->getInstrLatency(ItinData, MI); + // live-in list. SchedModel can handle a NULL UseMI. + DepHeight += SchedModel + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op, + /* FindMin = */ false); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -832,12 +830,12 @@ typedef DenseMap MIHeightMap; static bool pushDepHeight(const DataDep &Dep, const MachineInstr *UseMI, unsigned UseHeight, MIHeightMap &Heights, - const InstrItineraryData *ItinData, + const TargetSchedModel &SchedModel, const TargetInstrInfo *TII) { // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) - UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp); + UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, false); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -852,14 +850,14 @@ static bool pushDepHeight(const DataDep &Dep, return false; } -/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists -/// of all the blocks in Trace. Stop when reaching the block that contains -/// DefMI. +/// Assuming that the virtual register defined by DefMI:DefOp was used by +/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop +/// when reaching the block that contains DefMI. void MachineTraceMetrics::Ensemble:: -addLiveIns(const MachineInstr *DefMI, +addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); - unsigned Reg = DefMI->getOperand(0).getReg(); + unsigned Reg = DefMI->getOperand(DefOp).getReg(); assert(TargetRegisterInfo::isVirtualRegister(Reg)); const MachineBasicBlock *DefMBB = DefMI->getParent(); @@ -951,8 +949,8 @@ computeInstrHeights(const MachineBasicBlock *MBB) { unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); if (pushDepHeight(Deps.front(), PHI, Height, - Heights, MTM.ItinData, MTM.TII)) - addLiveIns(Deps.front().DefMI, Stack); + Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); } } } @@ -980,12 +978,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // There may also be regunit dependencies to include in the height. if (HasPhysRegs) Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, - MTM.ItinData, MTM.TII, MTM.TRI); + MTM.SchedModel, MTM.TII, MTM.TRI); // Update the required height of any virtual registers read by MI. for (unsigned i = 0, e = Deps.size(); i != e; ++i) - if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) - addLiveIns(Deps[i].DefMI, Stack); + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack); InstrCycles &MICycles = Cycles[MI]; MICycles.Height = Cycle; @@ -1054,10 +1052,8 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData, - Dep.DefMI, Dep.DefOp, - PHI, Dep.UseOp, - /* FindMin = */ false); + DepCycle += TE.MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false); return DepCycle; } @@ -1068,9 +1064,8 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { unsigned Instrs = TBI.InstrDepth; if (Bottom) Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; - if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) - if (Model->IssueWidth != 0) - return Instrs / Model->IssueWidth; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; // Assume issue width 1 without a schedule model. return Instrs; } @@ -1080,9 +1075,8 @@ getResourceLength(ArrayRef Extrablocks) const { unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; - if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) - if (Model->IssueWidth != 0) - return Instrs / Model->IssueWidth; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; // Assume issue width 1 without a schedule model. return Instrs; } diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h index c5b86f3..460730b 100644 --- a/lib/CodeGen/MachineTraceMetrics.h +++ b/lib/CodeGen/MachineTraceMetrics.h @@ -50,6 +50,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetSchedule.h" namespace llvm { @@ -67,9 +68,9 @@ class MachineTraceMetrics : public MachineFunctionPass { const MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const InstrItineraryData *ItinData; const MachineRegisterInfo *MRI; const MachineLoopInfo *Loops; + TargetSchedModel SchedModel; public: class Ensemble; @@ -164,6 +165,14 @@ public: /// Invalidate height resources when a block below this one has changed. void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } + /// Determine if this block belongs to the same trace as TBI and comes + /// before it in the trace. + /// Also returns true when TBI == this. + bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const { + return hasValidDepth() && TBI.hasValidDepth() && + Head == TBI.Head && InstrDepth <= TBI.InstrDepth; + } + // Data-dependency-related information. Per-instruction depth and height // are computed from data dependencies in the current trace, using // itinerary data. @@ -270,7 +279,7 @@ public: unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); void computeInstrDepths(const MachineBasicBlock*); void computeInstrHeights(const MachineBasicBlock*); - void addLiveIns(const MachineInstr *DefMI, + void addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef Trace); protected: diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index f745b41..69a3ae8 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,8 +23,9 @@ // the verifier errors. //===----------------------------------------------------------------------===// +#include "llvm/BasicBlock.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -73,11 +74,12 @@ namespace { typedef SmallVector RegMaskVector; typedef DenseSet RegSet; typedef DenseMap RegMap; + typedef SmallPtrSet BlockSet; const MachineInstr *FirstTerminator; + BlockSet FunctionBlocks; BitVector regsReserved; - BitVector regsAllocatable; RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; RegMaskVector regMasks; @@ -117,6 +119,9 @@ namespace { // block. This set is disjoint from regsLiveOut. RegSet vregsRequired; + // Set versions of block's predecessor and successor lists. + BlockSet Preds, Succs; + BBInfo() : reachable(false) {} // Add register to vregsPassed if it belongs there. Return true if @@ -180,7 +185,7 @@ namespace { } bool isAllocatable(unsigned Reg) { - return Reg < regsAllocatable.size() && regsAllocatable.test(Reg); + return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg); } // Analysis information if available @@ -208,6 +213,8 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); + void verifyInlineAsm(const MachineInstr *MI); + void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); @@ -352,7 +359,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { MF->print(*OS, Indexes); } *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getName() << "\n"; + << "- function: " << MF->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { @@ -360,7 +367,7 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { report(msg, MBB->getParent()); *OS << "- basic block: BB#" << MBB->getNumber() << ' ' << MBB->getName() - << " (" << (void*)MBB << ')'; + << " (" << (const void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -419,7 +426,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionBefore() { lastIndex = SlotIndex(); - regsReserved = TRI->getReservedRegs(*MF); + regsReserved = MRI->getReservedRegs(); // A sub-register of a reserved register is also reserved for (int Reg = regsReserved.find_first(); Reg>=0; @@ -431,9 +438,23 @@ void MachineVerifier::visitMachineFunctionBefore() { } } - regsAllocatable = TRI->getAllocatableSet(*MF); - markReachable(&MF->front()); + + // Build a set of the basic blocks in the function. + FunctionBlocks.clear(); + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + FunctionBlocks.insert(I); + BBInfo &MInfo = MBBInfoMap[I]; + + MInfo.Preds.insert(I->pred_begin(), I->pred_end()); + if (MInfo.Preds.size() != I->pred_size()) + report("MBB has duplicate entries in its predecessor list.", I); + + MInfo.Succs.insert(I->succ_begin(), I->succ_end()); + if (MInfo.Succs.size() != I->succ_size()) + report("MBB has duplicate entries in its successor list.", I); + } } // Does iterator point to a and b as the first two elements? @@ -470,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { E = MBB->succ_end(); I != E; ++I) { if ((*I)->isLandingPad()) LandingPadSuccs.insert(*I); + if (!FunctionBlocks.count(*I)) + report("MBB has successor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Preds.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the predecessor list of the successor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + // Check the predecessor list. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (!FunctionBlocks.count(*I)) + report("MBB has predecessor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Succs.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the successor list of the predecessor BB#" + << (*I)->getNumber() << ".\n"; + } } const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); @@ -540,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); - } if (MBB->succ_size() != 2) { + } if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (&*MBBI != TBB) + report("MBB exits via conditional branch/fall-through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/fall-through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { @@ -560,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (TBB && FBB) { // Block conditionally branches somewhere, otherwise branches // somewhere else. - if (MBB->succ_size() != 2) { + if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (FBB != TBB) + report("MBB exits via conditional branch/branch through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/branch through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/branch but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { @@ -639,6 +695,50 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { } } +// The operands on an INLINEASM instruction must follow a template. +// Verify that the flag operands make sense. +void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { + // The first two operands on INLINEASM are the asm string and global flags. + if (MI->getNumOperands() < 2) { + report("Too few operands on inline asm", MI); + return; + } + if (!MI->getOperand(0).isSymbol()) + report("Asm string must be an external symbol", MI); + if (!MI->getOperand(1).isImm()) + report("Asm flags must be an immediate", MI); + // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2, + // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16. + if (!isUInt<5>(MI->getOperand(1).getImm())) + report("Unknown asm flags", &MI->getOperand(1), 1); + + assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); + + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + unsigned NumOps; + for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) { + const MachineOperand &MO = MI->getOperand(OpNo); + // There may be implicit ops after the fixed operands. + if (!MO.isImm()) + break; + NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm()); + } + + if (OpNo > MI->getNumOperands()) + report("Missing operands in last group", MI); + + // An optional MDNode follows the groups. + if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata()) + ++OpNo; + + // All trailing operands must be implicit registers. + for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (!MO.isReg() || !MO.isImplicit()) + report("Expected implicit register after groups", &MO, OpNo); + } +} + void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { @@ -647,6 +747,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { << MI->getNumExplicitOperands() << " given.\n"; } + // Check the tied operands. + if (MI->isInlineAsm()) + verifyInlineAsm(MI); + // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { @@ -702,6 +806,17 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } + + int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); + if (TiedTo != -1) { + if (!MO->isReg()) + report("Tied use must be a register", MO, MONum); + else if (!MO->isTied()) + report("Operand should be tied", MO, MONum); + else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) + report("Tied def doesn't match MCInstrDesc", MO, MONum); + } else if (MO->isReg() && MO->isTied()) + report("Explicit operand should not be tied", MO, MONum); } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) @@ -716,6 +831,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify the consistency of tied operands. + if (MO->isTied()) { + unsigned OtherIdx = MI->findTiedOperandIdx(MONum); + const MachineOperand &OtherMO = MI->getOperand(OtherIdx); + if (!OtherMO.isReg()) + report("Must be tied to a register", MO, MONum); + if (!OtherMO.isTied()) + report("Missing tie flags on tied operand", MO, MONum); + if (MI->findTiedOperandIdx(OtherIdx) != MONum) + report("Inconsistent tie links", MO, MONum); + if (MONum < MCID.getNumDefs()) { + if (OtherIdx < MCID.getNumOperands()) { + if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO)) + report("Explicit def tied to explicit use without tie constraint", + MO, MONum); + } else { + if (!OtherMO.isImplicit()) + report("Explicit def should be tied to implicit use", MO, MONum); + } + } + } + // Verify two-address constraints after leaving SSA form. unsigned DefIdx; if (!MRI->isSSA() && MO->isUse() && diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index cfa3eec..4ea21d4 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -49,8 +49,8 @@ static cl::opt DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt DisableMachineDCE("disable-machine-dce", cl::Hidden, cl::desc("Disable Machine Dead Code Elimination")); -static cl::opt EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden, - cl::desc("Enable Early If-conversion")); +static cl::opt DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden, + cl::desc("Disable Early If-conversion")); static cl::opt DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); static cl::opt DisableMachineCSE("disable-machine-cse", cl::Hidden, @@ -161,7 +161,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { return applyDisable(TargetID, DisableMachineDCE); if (StandardID == &EarlyIfConverterID) - return applyDisable(TargetID, !EnableEarlyIfConversion); + return applyDisable(TargetID, DisableEarlyIfConversion); if (StandardID == &MachineLICMID) return applyDisable(TargetID, DisableMachineLICM); @@ -447,8 +447,8 @@ void TargetPassConfig::addMachinePasses() { const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); assert (TPI && IPI && "Pass ID not registered!"); - const char *TID = (char *)(TPI->getTypeInfo()); - const char *IID = (char *)(IPI->getTypeInfo()); + const char *TID = (const char *)(TPI->getTypeInfo()); + const char *IID = (const char *)(IPI->getTypeInfo()); insertPass(TID, IID); } @@ -456,7 +456,8 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After Instruction Selection"); // Expand pseudo-instructions emitted by ISel. - addPass(&ExpandISelPseudosID); + if (addPass(&ExpandISelPseudosID)) + printAndVerify("After ExpandISelPseudos"); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -528,6 +529,10 @@ void TargetPassConfig::addMachineSSAOptimization() { // instructions dead. addPass(&OptimizePHIsID); + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringID); + // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. addPass(&LocalStackSlotAllocationID); diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 9099862..a795ac8 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -527,6 +527,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); + // optimizeExtInstr might have created new instructions after MI + // and before the already incremented MII. Adjust MII so that the + // next iteration sees the new instructions. + MII = MI; + ++MII; if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 7449ff5..d57bc73 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -240,6 +240,7 @@ void SchedulePostRATDList::exitRegion() { ScheduleDAGInstrs::exitRegion(); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// dumpSchedule - dump the scheduled Sequence. void SchedulePostRATDList::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { @@ -249,6 +250,7 @@ void SchedulePostRATDList::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); @@ -298,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName() + dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":BB#" << MBB->getNumber() << " ***\n"; } #endif @@ -488,7 +490,6 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); BitVector killedRegs(TRI->getNumRegs()); - BitVector ReservedRegs = TRI->getReservedRegs(MF); StartBlockForKills(MBB); @@ -529,7 +530,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); - if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + if ((Reg == 0) || MRI.isReserved(Reg)) continue; bool kill = false; if (!killedRegs.test(Reg)) { @@ -564,7 +565,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); - if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + if ((Reg == 0) || MRI.isReserved(Reg)) continue; LiveRegs.set(Reg); diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 34d075c..e4e18c3 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -137,8 +137,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" - << "********** Function: " - << ((Value*)MF.getFunction())->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); bool Changed = false; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index c791ffb..77554d6 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -96,7 +96,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->getFnAttributes().hasAttribute(Attributes::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function @@ -111,7 +111,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->getFnAttributes().hasAttribute(Attributes::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -221,13 +221,13 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // In Naked functions we aren't going to save any registers. - if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) + if (Fn.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) return; std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { + if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); } diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 3a03807..8a49609 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -20,7 +20,6 @@ #include "VirtRegMap.h" #include "LiveRegMatrix.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -273,7 +272,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, bool RABasic::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << mf.getName() << '\n'); MF = &mf; RegAllocBase::init(getAnalysis(), diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 6b3a48e..8892216 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -113,9 +113,11 @@ namespace { // PhysRegState - One of the RegState enums, or a virtreg. std::vector PhysRegState; - // UsedInInstr - BitVector of physregs that are used in the current - // instruction, and so cannot be allocated. - BitVector UsedInInstr; + typedef SparseSet UsedInInstrSet; + + // UsedInInstr - Set of physregs that are used in the current instruction, + // and so cannot be allocated. + UsedInInstrSet UsedInInstr; // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to @@ -173,7 +175,7 @@ namespace { unsigned VirtReg, unsigned Hint); LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); - void spillAll(MachineInstr *MI); + void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); void addRetOperands(MachineBasicBlock *MBB); }; @@ -312,7 +314,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, } /// spillAll - Spill all dirty virtregs without killing them. -void RAFast::spillAll(MachineInstr *MI) { +void RAFast::spillAll(MachineBasicBlock::iterator MI) { if (LiveVirtRegs.empty()) return; isBulkSpilling = true; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order @@ -340,7 +342,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { PhysRegState[PhysReg] = regFree; // Fall through case regFree: - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); MO.setIsKill(); return; default: @@ -360,13 +362,13 @@ void RAFast::usePhysReg(MachineOperand &MO) { "Instruction is not using a subregister of a reserved register"); // Leave the superregister in the working set. PhysRegState[Alias] = regFree; - UsedInInstr.set(Alias); + UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. - UsedInInstr.set(Alias); + UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } @@ -380,7 +382,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { // All aliases are disabled, bring register into working set. PhysRegState[PhysReg] = regFree; - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); MO.setIsKill(); } @@ -389,7 +391,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { /// reserved instead of allocated. void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState) { - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -429,7 +431,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.test(PhysReg)) { + if (UsedInInstr.count(PhysReg)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } @@ -454,7 +456,7 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; - if (UsedInInstr.test(Alias)) + if (UsedInInstr.count(Alias)) return spillImpossible; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: @@ -509,7 +511,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, // Ignore invalid hints. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint))) + !RC->contains(Hint) || !MRI->isAllocatable(Hint))) Hint = 0; // Take hint when possible. @@ -530,7 +532,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, // First try to find a completely free register. for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) { + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) { assignVirtToPhysReg(*LRI, PhysReg); return LRI; } @@ -596,7 +598,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, LRI->LastUse = MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; - UsedInInstr.set(LRI->PhysReg); + UsedInInstr.insert(LRI->PhysReg); return LRI; } @@ -646,7 +648,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = MI; LRI->LastOpNum = OpNum; - UsedInInstr.set(LRI->PhysReg); + UsedInInstr.insert(LRI->PhysReg); return LRI; } @@ -708,7 +710,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - UsedInInstr.set(*AI); + UsedInInstr.insert(*AI); if (ThroughRegs.count(PhysRegState[*AI])) definePhysReg(MI, *AI, regFree); } @@ -756,7 +758,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, } // Restore UsedInInstr to a state usable for allocating normal virtual uses. - UsedInInstr.reset(); + UsedInInstr.clear(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; @@ -764,12 +766,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) << " as used in instr\n"); - UsedInInstr.set(Reg); + UsedInInstr.insert(Reg); } // Also mark PartialDefs as used to avoid reallocation. for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) - UsedInInstr.set(PartialDefs[i]); + UsedInInstr.insert(PartialDefs[i]); } /// addRetOperand - ensure that a return instruction has an operand for each @@ -838,7 +840,7 @@ void RAFast::AllocateBasicBlock() { // Add live-in registers as live. for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) - if (RegClassInfo.isAllocatable(*I)) + if (MRI->isAllocatable(*I)) definePhysReg(MII, *I, regReserved); SmallVector VirtDead; @@ -942,7 +944,7 @@ void RAFast::AllocateBasicBlock() { } // Track registers used by instruction. - UsedInInstr.reset(); + UsedInInstr.clear(); // First scan. // Mark physreg uses and early clobbers as used. @@ -954,6 +956,11 @@ void RAFast::AllocateBasicBlock() { bool hasPhysDefs = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) { + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + continue; + } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; @@ -970,7 +977,7 @@ void RAFast::AllocateBasicBlock() { } continue; } - if (!RegClassInfo.isAllocatable(Reg)) continue; + if (!MRI->isAllocatable(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { @@ -1016,11 +1023,13 @@ void RAFast::AllocateBasicBlock() { } } - MRI->addPhysRegsUsed(UsedInInstr); + for (UsedInInstrSet::iterator + I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) + MRI->setPhysRegUsed(*I); // Track registers defined by instruction - early clobbers and tied uses at // this point. - UsedInInstr.reset(); + UsedInInstr.clear(); if (hasEarlyClobbers) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -1030,7 +1039,7 @@ void RAFast::AllocateBasicBlock() { // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedInInstr.set(*AI); + UsedInInstr.insert(*AI); } } @@ -1058,7 +1067,7 @@ void RAFast::AllocateBasicBlock() { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!RegClassInfo.isAllocatable(Reg)) continue; + if (!MRI->isAllocatable(Reg)) continue; definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); continue; @@ -1080,7 +1089,9 @@ void RAFast::AllocateBasicBlock() { killVirtReg(VirtDead[i]); VirtDead.clear(); - MRI->addPhysRegsUsed(UsedInInstr); + for (UsedInInstrSet::iterator + I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) + MRI->setPhysRegUsed(*I); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); @@ -1110,8 +1121,7 @@ void RAFast::AllocateBasicBlock() { /// bool RAFast::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" - << "********** Function: " - << ((Value*)Fn.getFunction())->getName() << '\n'); + << "********** Function: " << Fn.getName() << '\n'); MF = &Fn; MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); @@ -1119,7 +1129,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { TII = TM->getInstrInfo(); MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); - UsedInInstr.resize(TRI->getNumRegs()); + UsedInInstr.clear(); + UsedInInstr.setUniverse(TRI->getNumRegs()); assert(!MRI->isSSA() && "regalloc requires leaving SSA"); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 6ac5428..06f69c1e 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -24,7 +24,6 @@ #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" @@ -331,9 +330,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -509,7 +508,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. -/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param IsHint True when PhysReg is VirtReg's preferred register. /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. @@ -1746,8 +1745,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" - << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << "********** Function: " << mf.getName() << '\n'); MF = &mf; if (VerifyEnabled) diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index d0db26b..02ebce7 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -118,7 +118,6 @@ private: typedef std::vector AllowedSetMap; typedef std::pair RegPair; typedef std::map CoalesceMap; - typedef std::vector NodeVector; typedef std::set RegSet; @@ -192,7 +191,6 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, const MachineLoopInfo *loopInfo, const RegSet &vregs) { - typedef std::vector LIVector; LiveIntervals *LIS = const_cast(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -209,8 +207,6 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, mri->setPhysRegUsed(Reg); } - BitVector reservedRegs = tri->getReservedRegs(*mf); - // Iterate over vregs. for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); vregItr != vregEnd; ++vregItr) { @@ -219,7 +215,7 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, LiveInterval *vregLI = &LIS->getInterval(vreg); // Record any overlaps with regmask operands. - BitVector regMaskOverlaps(tri->getNumRegs()); + BitVector regMaskOverlaps; LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); // Compute an initial allowed set for the current vreg. @@ -228,7 +224,7 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, ArrayRef rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; - if (reservedRegs.test(preg)) + if (mri->isReserved(preg)) continue; // vregLI crosses a regmask operand that clobbers preg. @@ -358,7 +354,7 @@ std::auto_ptr PBQPBuilderWithCoalescing::build( loopInfo->getLoopDepth(mbb)); if (cp.isPhys()) { - if (!lis->isAllocatable(dst)) { + if (!mf->getRegInfo().isAllocatable(dst)) { continue; } @@ -433,6 +429,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired(); au.addPreserved(); au.addRequired(); + au.addPreserved(); //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); @@ -444,6 +441,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired(); au.addPreserved(); au.addRequired(); + au.addPreserved(); MachineFunctionPass::getAnalysisUsage(au); } @@ -556,7 +554,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { mri->freezeReservedRegs(MF); - DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n"); // Allocator main loop: // @@ -570,11 +568,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); +#ifndef NDEBUG const Function* func = mf->getFunction(); std::string fqn = func->getParent()->getModuleIdentifier() + "." + func->getName().str(); - (void)fqn; +#endif // If there are non-empty intervals allocate them using pbqp. if (!vregsToAlloc.empty()) { diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 652bc30..805d235 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -15,8 +15,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -57,10 +58,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { CalleeSaved = CSR; // Different reserved registers? - BitVector RR = TRI->getReservedRegs(*MF); - if (RR != Reserved) + const BitVector &RR = MF->getRegInfo().getReservedRegs(); + if (Reserved.size() != RR.size() || RR != Reserved) { Update = true; - Reserved = RR; + Reserved = RR; + } // Invalidate cached information from previous function. if (Update) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 9906334..2538f10 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -55,6 +55,8 @@ STATISTIC(numCommutes , "Number of instruction commuting performed"); STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); STATISTIC(NumInflated , "Number of register classes inflated"); +STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); +STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); static cl::opt EnableJoining("join-liveintervals", @@ -123,6 +125,9 @@ namespace { /// can use this information below to update aliases. bool joinIntervals(CoalescerPair &CP); + /// Attempt joining two virtual registers. Return true on success. + bool joinVirtRegs(CoalescerPair &CP); + /// Attempt joining with a reserved physreg. bool joinReservedPhysReg(CoalescerPair &CP); @@ -193,12 +198,6 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", char RegisterCoalescer::ID = 0; -static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { - if (!a) return b; - if (!b) return a; - return tri.composeSubRegIndices(a, b); -} - static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, unsigned &Src, unsigned &Dst, unsigned &SrcSub, unsigned &DstSub) { @@ -209,8 +208,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, SrcSub = MI->getOperand(1).getSubReg(); } else if (MI->isSubregToReg()) { Dst = MI->getOperand(0).getReg(); - DstSub = compose(tri, MI->getOperand(0).getSubReg(), - MI->getOperand(3).getImm()); + DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); Src = MI->getOperand(2).getReg(); SrcSub = MI->getOperand(2).getSubReg(); } else @@ -349,7 +348,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (DstReg != Dst) return false; // Registers match, do the subregisters line up? - return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub); + return TRI.composeSubRegIndices(SrcIdx, SrcSub) == + TRI.composeSubRegIndices(DstIdx, DstSub); } } @@ -425,7 +425,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); - if (!CP.isCoalescable(ACopyMI)) + // Don't allow any partial copies, even if isCoalescable() allows them. + if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) return false; // Get the LiveRange in IntB that this value number starts with. @@ -583,7 +584,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !NewDstMO.isKill()) + if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) return false; // Make sure there are no other definitions of IntB that would reach the @@ -849,8 +850,17 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // Update LiveDebugVariables. LDV->renameRegister(SrcReg, DstReg, SubIdx); + SmallPtrSet Visited; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { + // Each instruction can only be rewritten once because sub-register + // composition is not always idempotent. When SrcReg != DstReg, rewriting + // the UseMI operands removes them from the SrcReg use-def chain, but when + // SrcReg is DstReg we could encounter UseMI twice if it has multiple + // operands mentioning the virtual register. + if (SrcReg == DstReg && !Visited.insert(UseMI)) + continue; + SmallVector Ops; bool Reads, Writes; tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); @@ -890,7 +900,7 @@ bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is /// always beneficial. - if (!RegClassInfo.isReserved(CP.getDstReg())) { + if (!MRI->isReserved(CP.getDstReg())) { DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); return false; } @@ -1065,7 +1075,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { /// Attempt joining with a reserved physreg. bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { assert(CP.isPhys() && "Must be a physreg copy"); - assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register"); + assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS << '\n'); @@ -1102,347 +1112,797 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { return true; } -/// ComputeUltimateVN - Assuming we are going to join two live intervals, -/// compute what the resultant value numbers for each value in the input two -/// ranges will be. This is complicated by copies between the two which can -/// and will commonly cause multiple value numbers to be merged into one. -/// -/// VN is the value number that we're trying to resolve. InstDefiningValue -/// keeps track of the new InstDefiningValue assignment for the result -/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of -/// whether a value in this or other is a copy from the opposite set. -/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have -/// already been assigned. -/// -/// ThisFromOther[x] - If x is defined as a copy from the other interval, this -/// contains the value number the copy is from. -/// -static unsigned ComputeUltimateVN(VNInfo *VNI, - SmallVector &NewVNInfo, - DenseMap &ThisFromOther, - DenseMap &OtherFromThis, - SmallVector &ThisValNoAssignments, - SmallVector &OtherValNoAssignments) { - unsigned VN = VNI->id; - - // If the VN has already been computed, just return it. - if (ThisValNoAssignments[VN] >= 0) - return ThisValNoAssignments[VN]; - assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); - - // If this val is not a copy from the other val, then it must be a new value - // number in the destination. - DenseMap::iterator I = ThisFromOther.find(VNI); - if (I == ThisFromOther.end()) { - NewVNInfo.push_back(VNI); - return ThisValNoAssignments[VN] = NewVNInfo.size()-1; - } - VNInfo *OtherValNo = I->second; - - // Otherwise, this *is* a copy from the RHS. If the other side has already - // been computed, return it. - if (OtherValNoAssignments[OtherValNo->id] >= 0) - return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; - - // Mark this value number as currently being computed, then ask what the - // ultimate value # of the other value is. - ThisValNoAssignments[VN] = -2; - unsigned UltimateVN = - ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, - OtherValNoAssignments, ThisValNoAssignments); - return ThisValNoAssignments[VN] = UltimateVN; -} +//===----------------------------------------------------------------------===// +// Interference checking and interval joining +//===----------------------------------------------------------------------===// +// +// In the easiest case, the two live ranges being joined are disjoint, and +// there is no interference to consider. It is quite common, though, to have +// overlapping live ranges, and we need to check if the interference can be +// resolved. +// +// The live range of a single SSA value forms a sub-tree of the dominator tree. +// This means that two SSA values overlap if and only if the def of one value +// is contained in the live range of the other value. As a special case, the +// overlapping values can be defined at the same index. +// +// The interference from an overlapping def can be resolved in these cases: +// +// 1. Coalescable copies. The value is defined by a copy that would become an +// identity copy after joining SrcReg and DstReg. The copy instruction will +// be removed, and the value will be merged with the source value. +// +// There can be several copies back and forth, causing many values to be +// merged into one. We compute a list of ultimate values in the joined live +// range as well as a mappings from the old value numbers. +// +// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI +// predecessors have a live out value. It doesn't cause real interference, +// and can be merged into the value it overlaps. Like a coalescable copy, it +// can be erased after joining. +// +// 3. Copy of external value. The overlapping def may be a copy of a value that +// is already in the other register. This is like a coalescable copy, but +// the live range of the source register must be trimmed after erasing the +// copy instruction: +// +// %src = COPY %ext +// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. +// +// 4. Clobbering undefined lanes. Vector registers are sometimes built by +// defining one lane at a time: +// +// %dst:ssub0 = FOO +// %src = BAR +// %dst:ssub1 = COPY %src +// +// The live range of %src overlaps the %dst value defined by FOO, but +// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane +// which was undef anyway. +// +// The value mapping is more complicated in this case. The final live range +// will have different value numbers for both FOO and BAR, but there is no +// simple mapping from old to new values. It may even be necessary to add +// new PHI values. +// +// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that +// is live, but never read. This can happen because we don't compute +// individual live ranges per lane. +// +// %dst = FOO +// %src = BAR +// %dst:ssub1 = COPY %src +// +// This kind of interference is only resolved locally. If the clobbered +// lane value escapes the block, the join is aborted. +namespace { +/// Track information about values in a single virtual register about to be +/// joined. Objects of this class are always created in pairs - one for each +/// side of the CoalescerPair. +class JoinVals { + LiveInterval &LI; + + // Location of this register in the final joined register. + // Either CP.DstIdx or CP.SrcIdx. + unsigned SubIdx; + + // Values that will be present in the final live range. + SmallVectorImpl &NewVNInfo; + + const CoalescerPair &CP; + LiveIntervals *LIS; + SlotIndexes *Indexes; + const TargetRegisterInfo *TRI; + + // Value number assignments. Maps value numbers in LI to entries in NewVNInfo. + // This is suitable for passing to LiveInterval::join(). + SmallVector Assignments; + + // Conflict resolution for overlapping values. + enum ConflictResolution { + // No overlap, simply keep this value. + CR_Keep, + + // Merge this value into OtherVNI and erase the defining instruction. + // Used for IMPLICIT_DEF, coalescable copies, and copies from external + // values. + CR_Erase, + + // Merge this value into OtherVNI but keep the defining instruction. + // This is for the special case where OtherVNI is defined by the same + // instruction. + CR_Merge, + + // Keep this value, and have it replace OtherVNI where possible. This + // complicates value mapping since OtherVNI maps to two different values + // before and after this def. + // Used when clobbering undefined or dead lanes. + CR_Replace, + + // Unresolved conflict. Visit later when all values have been mapped. + CR_Unresolved, + + // Unresolvable conflict. Abort the join. + CR_Impossible + }; -// Find out if we have something like -// A = X -// B = X -// if so, we can pretend this is actually -// A = X -// B = A -// which allows us to coalesce A and B. -// VNI is the definition of B. LR is the life range of A that includes -// the slot just before B. If we return true, we add "B = X" to DupCopies. -// This implies that A dominates B. -static bool RegistersDefinedFromSameValue(LiveIntervals &li, - const TargetRegisterInfo &tri, - CoalescerPair &CP, - VNInfo *VNI, - VNInfo *OtherVNI, - SmallVector &DupCopies) { - // FIXME: This is very conservative. For example, we don't handle - // physical registers. - - MachineInstr *MI = li.getInstructionFromIndex(VNI->def); - - if (!MI || CP.isPartial() || CP.isPhys()) - return false; + // Per-value info for LI. The lane bit masks are all relative to the final + // joined register, so they can be compared directly between SrcReg and + // DstReg. + struct Val { + ConflictResolution Resolution; - unsigned A = CP.getDstReg(); - if (!TargetRegisterInfo::isVirtualRegister(A)) - return false; + // Lanes written by this def, 0 for unanalyzed values. + unsigned WriteLanes; - unsigned B = CP.getSrcReg(); - if (!TargetRegisterInfo::isVirtualRegister(B)) - return false; + // Lanes with defined values in this register. Other lanes are undef and + // safe to clobber. + unsigned ValidLanes; - MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); - if (!OtherMI) - return false; + // Value in LI being redefined by this def. + VNInfo *RedefVNI; - if (MI->isImplicitDef()) { - DupCopies.push_back(MI); - return true; - } else { - if (!MI->isFullCopy()) - return false; - unsigned Src = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Src)) - return false; - if (!OtherMI->isFullCopy()) - return false; - unsigned OtherSrc = OtherMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(OtherSrc)) - return false; + // Value in the other live range that overlaps this def, if any. + VNInfo *OtherVNI; - if (Src != OtherSrc) - return false; + // Is this value an IMPLICIT_DEF? + bool IsImplicitDef; - // If the copies use two different value numbers of X, we cannot merge - // A and B. - LiveInterval &SrcInt = li.getInterval(Src); - // getVNInfoBefore returns NULL for undef copies. In this case, the - // optimization is still safe. - if (SrcInt.getVNInfoBefore(OtherVNI->def) != - SrcInt.getVNInfoBefore(VNI->def)) - return false; + // True when the live range of this value will be pruned because of an + // overlapping CR_Replace value in the other live range. + bool Pruned; - DupCopies.push_back(MI); - return true; - } -} + // True once Pruned above has been computed. + bool PrunedComputed; -/// joinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. -bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { - // Handle physreg joins separately. - if (CP.isPhys()) - return joinReservedPhysReg(CP); + Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), + RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false), + PrunedComputed(false) {} - LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << '\n'); + bool isAnalyzed() const { return WriteLanes != 0; } + }; - // Compute the final value assignment, assuming that the live ranges can be - // coalesced. - SmallVector LHSValNoAssignments; - SmallVector RHSValNoAssignments; - DenseMap LHSValsDefinedFromRHS; - DenseMap RHSValsDefinedFromLHS; - SmallVector NewVNInfo; + // One entry per value number in LI. + SmallVector Vals; + + unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef); + VNInfo *stripCopies(VNInfo *VNI); + ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); + void computeAssignment(unsigned ValNo, JoinVals &Other); + bool taintExtent(unsigned, unsigned, JoinVals&, + SmallVectorImpl >&); + bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); + bool isPrunedValue(unsigned ValNo, JoinVals &Other); + +public: + JoinVals(LiveInterval &li, unsigned subIdx, + SmallVectorImpl &newVNInfo, + const CoalescerPair &cp, + LiveIntervals *lis, + const TargetRegisterInfo *tri) + : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis), + Indexes(LIS->getSlotIndexes()), TRI(tri), + Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums()) + {} + + /// Analyze defs in LI and compute a value mapping in NewVNInfo. + /// Returns false if any conflicts were impossible to resolve. + bool mapValues(JoinVals &Other); + + /// Try to resolve conflicts that require all values to be mapped. + /// Returns false if any conflicts were impossible to resolve. + bool resolveConflicts(JoinVals &Other); + + /// Prune the live range of values in Other.LI where they would conflict with + /// CR_Replace values in LI. Collect end points for restoring the live range + /// after joining. + void pruneValues(JoinVals &Other, SmallVectorImpl &EndPoints); + + /// Erase any machine instructions that have been coalesced away. + /// Add erased instructions to ErasedInstrs. + /// Add foreign virtual registers to ShrinkRegs if their live range ended at + /// the erased instrs. + void eraseInstrs(SmallPtrSet &ErasedInstrs, + SmallVectorImpl &ShrinkRegs); + + /// Get the value assignments suitable for passing to LiveInterval::join. + const int *getAssignments() const { return Assignments.data(); } +}; +} // end anonymous namespace + +/// Compute the bitmask of lanes actually written by DefMI. +/// Set Redef if there are any partial register definitions that depend on the +/// previous value of the register. +unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { + unsigned L = 0; + for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) + continue; + L |= TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); + if (MO->readsReg()) + Redef = true; + } + return L; +} - SmallVector DupCopies; - SmallVector DeadCopies; +/// Find the ultimate value that VNI was copied from. +VNInfo *JoinVals::stripCopies(VNInfo *VNI) { + while (!VNI->isPHIDef()) { + MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def); + assert(MI && "No defining instruction"); + if (!MI->isFullCopy()) + break; + unsigned Reg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + break; + LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); + if (!LRQ.valueIn()) + break; + VNI = LRQ.valueIn(); + } + return VNI; +} - LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); - DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS - << '\n'); +/// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. +/// Return a conflict resolution when possible, but leave the hard cases as +/// CR_Unresolved. +/// Recursively calls computeAssignment() on this and Other, guaranteeing that +/// both OtherVNI and RedefVNI have been analyzed and mapped before returning. +/// The recursion always goes upwards in the dominator tree, making loops +/// impossible. +JoinVals::ConflictResolution +JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + assert(!V.isAnalyzed() && "Value has already been analyzed!"); + VNInfo *VNI = LI.getValNumInfo(ValNo); + if (VNI->isUnused()) { + V.WriteLanes = ~0u; + return CR_Keep; + } - // Loop over the value numbers of the LHS, seeing if any are defined from - // the RHS. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->isPHIDef()) - continue; - MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); - assert(MI && "Missing def"); - if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? - continue; + // Get the instruction defining this value, compute the lanes written. + const MachineInstr *DefMI = 0; + if (VNI->isPHIDef()) { + // Conservatively assume that all lanes in a PHI are valid. + V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); + } else { + DefMI = Indexes->getInstructionFromIndex(VNI->def); + bool Redef = false; + V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); + + // If this is a read-modify-write instruction, there may be more valid + // lanes than the ones written by this instruction. + // This only covers partial redef operands. DefMI may have normal use + // operands reading the register. They don't contribute valid lanes. + // + // This adds ssub1 to the set of valid lanes in %src: + // + // %src:ssub1 = FOO + // + // This leaves only ssub1 valid, making any other lanes undef: + // + // %src:ssub1 = FOO %src:ssub2 + // + // The flag on the def operand means that old lane values are + // not important. + if (Redef) { + V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); + assert(V.RedefVNI && "Instruction is reading nonexistent value"); + computeAssignment(V.RedefVNI->id, Other); + V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; + } - // Figure out the value # from the RHS. - VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def); - // The copy could be to an aliased physreg. - if (!OtherVNI) - continue; + // An IMPLICIT_DEF writes undef values. + if (DefMI->isImplicitDef()) { + V.IsImplicitDef = true; + V.ValidLanes &= ~V.WriteLanes; + } + } - // DstReg is known to be a register in the LHS interval. If the src is - // from the RHS interval, we can use its value #. - if (CP.isCoalescable(MI)) - DeadCopies.push_back(MI); - else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, - DupCopies)) - continue; + // Find the value in Other that overlaps VNI->def, if any. + LiveRangeQuery OtherLRQ(Other.LI, VNI->def); + + // It is possible that both values are defined by the same instruction, or + // the values are PHIs defined in the same block. When that happens, the two + // values should be merged into one, but not into any preceding value. + // The first value defined or visited gets CR_Keep, the other gets CR_Merge. + if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { + assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); + + // One value stays, the other is merged. Keep the earlier one, or the first + // one we see. + if (OtherVNI->def < VNI->def) + Other.computeAssignment(OtherVNI->id, *this); + else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { + // This is an early-clobber def overlapping a live-in value in the other + // register. Not mergeable. + V.OtherVNI = OtherLRQ.valueIn(); + return CR_Impossible; + } + V.OtherVNI = OtherVNI; + Val &OtherV = Other.Vals[OtherVNI->id]; + // Keep this value, check for conflicts when analyzing OtherVNI. + if (!OtherV.isAnalyzed()) + return CR_Keep; + // Both sides have been analyzed now. + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Merge; + if (V.ValidLanes & OtherV.ValidLanes) + // Overlapping lanes can't be resolved. + return CR_Impossible; + else + return CR_Merge; + } - LHSValsDefinedFromRHS[VNI] = OtherVNI; + // No simultaneous def. Is Other live at the def? + V.OtherVNI = OtherLRQ.valueIn(); + if (!V.OtherVNI) + // No overlap, no conflict. + return CR_Keep; + + assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); + + // We have overlapping values, or possibly a kill of Other. + // Recursively compute assignments up the dominator tree. + Other.computeAssignment(V.OtherVNI->id, *this); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Replace; + + // Check for simple erasable conflicts. + if (DefMI->isImplicitDef()) + return CR_Erase; + + // Include the non-conflict where DefMI is a coalescable copy that kills + // OtherVNI. We still want the copy erased and value numbers merged. + if (CP.isCoalescable(DefMI)) { + // Some of the lanes copied from OtherVNI may be undef, making them undef + // here too. + V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; + return CR_Erase; } - // Loop over the value numbers of the RHS, seeing if any are defined from - // the LHS. - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->isPHIDef()) - continue; - MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); - assert(MI && "Missing def"); - if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? - continue; + // This may not be a real conflict if DefMI simply kills Other and defines + // VNI. + if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) + return CR_Keep; + + // Handle the case where VNI and OtherVNI can be proven to be identical: + // + // %other = COPY %ext + // %this = COPY %ext <-- Erase this copy + // + if (DefMI->isFullCopy() && !CP.isPartial() && + stripCopies(VNI) == stripCopies(V.OtherVNI)) + return CR_Erase; + + // If the lanes written by this instruction were all undef in OtherVNI, it is + // still safe to join the live ranges. This can't be done with a simple value + // mapping, though - OtherVNI will map to multiple values: + // + // 1 %dst:ssub0 = FOO <-- OtherVNI + // 2 %src = BAR <-- VNI + // 3 %dst:ssub1 = COPY %src <-- Eliminate this copy. + // 4 BAZ %dst + // 5 QUUX %src + // + // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace + // handles this complex value mapping. + if ((V.WriteLanes & OtherV.ValidLanes) == 0) + return CR_Replace; + + // If the other live range is killed by DefMI and the live ranges are still + // overlapping, it must be because we're looking at an early clobber def: + // + // %dst = ASM %src + // + // In this case, it is illegal to merge the two live ranges since the early + // clobber def would clobber %src before it was read. + if (OtherLRQ.isKill()) { + // This case where the def doesn't overlap the kill is handled above. + assert(VNI->def.isEarlyClobber() && + "Only early clobber defs can overlap a kill"); + return CR_Impossible; + } - // Figure out the value # from the LHS. - VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def); - // The copy could be to an aliased physreg. - if (!OtherVNI) - continue; + // VNI is clobbering live lanes in OtherVNI, but there is still the + // possibility that no instructions actually read the clobbered lanes. + // If we're clobbering all the lanes in OtherVNI, at least one must be read. + // Otherwise Other.LI wouldn't be live here. + if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) + return CR_Impossible; + + // We need to verify that no instructions are reading the clobbered lanes. To + // save compile time, we'll only check that locally. Don't allow the tainted + // value to escape the basic block. + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) + return CR_Impossible; + + // There are still some things that could go wrong besides clobbered lanes + // being read, for example OtherVNI may be only partially redefined in MBB, + // and some clobbered lanes could escape the block. Save this analysis for + // resolveConflicts() when all values have been mapped. We need to know + // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute + // that now - the recursive analyzeValue() calls must go upwards in the + // dominator tree. + return CR_Unresolved; +} - // DstReg is known to be a register in the RHS interval. If the src is - // from the LHS interval, we can use its value #. - if (CP.isCoalescable(MI)) - DeadCopies.push_back(MI); - else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, - DupCopies)) - continue; +/// Compute the value assignment for ValNo in LI. +/// This may be called recursively by analyzeValue(), but never for a ValNo on +/// the stack. +void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.isAnalyzed()) { + // Recursion should always move up the dominator tree, so ValNo is not + // supposed to reappear before it has been assigned. + assert(Assignments[ValNo] != -1 && "Bad recursion?"); + return; + } + switch ((V.Resolution = analyzeValue(ValNo, Other))) { + case CR_Erase: + case CR_Merge: + // Merge this ValNo into OtherVNI. + assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); + assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); + Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; + DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@' + << LI.getValNumInfo(ValNo)->def << " into " + << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@' + << V.OtherVNI->def << " --> @" + << NewVNInfo[Assignments[ValNo]]->def << '\n'); + break; + case CR_Replace: + case CR_Unresolved: + // The other value is going to be pruned if this join is successful. + assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); + Other.Vals[V.OtherVNI->id].Pruned = true; + // Fall through. + default: + // This value number needs to go in the final joined live range. + Assignments[ValNo] = NewVNInfo.size(); + NewVNInfo.push_back(LI.getValNumInfo(ValNo)); + break; + } +} - RHSValsDefinedFromLHS[VNI] = OtherVNI; +bool JoinVals::mapValues(JoinVals &Other) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + computeAssignment(i, Other); + if (Vals[i].Resolution == CR_Impossible) { + DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i + << '@' << LI.getValNumInfo(i)->def << '\n'); + return false; + } } + return true; +} - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); +/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute +/// the extent of the tainted lanes in the block. +/// +/// Multiple values in Other.LI can be affected since partial redefinitions can +/// preserve previously tainted lanes. +/// +/// 1 %dst = VLOAD <-- Define all lanes in %dst +/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 +/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 +/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read +/// +/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) +/// entry to TaintedVals. +/// +/// Returns false if the tainted lanes extend beyond the basic block. +bool JoinVals:: +taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, + SmallVectorImpl > &TaintExtent) { + VNInfo *VNI = LI.getValNumInfo(ValNo); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); + + // Scan Other.LI from VNI.def to MBBEnd. + LiveInterval::iterator OtherI = Other.LI.find(VNI->def); + assert(OtherI != Other.LI.end() && "No conflict?"); + do { + // OtherI is pointing to a tainted value. Abort the join if the tainted + // lanes escape the block. + SlotIndex End = OtherI->end; + if (End >= MBBEnd) { + DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start << '\n'); + return false; + } + DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start + << " to " << End << '\n'); + // A dead def is not a problem. + if (End.isDead()) + break; + TaintExtent.push_back(std::make_pair(End, TaintedLanes)); + + // Check for another def in the MBB. + if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) + break; + + // Lanes written by the new def are no longer tainted. + const Val &OV = Other.Vals[OtherI->valno->id]; + TaintedLanes &= ~OV.WriteLanes; + if (!OV.RedefVNI) + break; + } while (TaintedLanes); + return true; +} - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) +/// Return true if MI uses any of the given Lanes from Reg. +/// This does not include partial redefinitions of Reg. +bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, + unsigned Lanes) { + if (MI->isDebugValue()) + return false; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) continue; - ComputeUltimateVN(VNI, NewVNInfo, - LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, - LHSValNoAssignments, RHSValNoAssignments); - } - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + if (!MO->readsReg()) continue; - // If this value number isn't a copy from the LHS, it's a new number. - if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { - NewVNInfo.push_back(VNI); - RHSValNoAssignments[VN] = NewVNInfo.size()-1; + if (Lanes & TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO->getSubReg()))) + return true; + } + return false; +} + +bool JoinVals::resolveConflicts(JoinVals &Other) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); + if (V.Resolution != CR_Unresolved) continue; - } + DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i + << '@' << LI.getValNumInfo(i)->def << '\n'); + ++NumLaneConflicts; + assert(V.OtherVNI && "Inconsistent conflict resolution."); + VNInfo *VNI = LI.getValNumInfo(i); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the + // join, those lanes will be tainted with a wrong value. Get the extent of + // the tainted lanes. + unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector, 8> TaintExtent; + if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) + // Tainted lanes would extend beyond the basic block. + return false; - ComputeUltimateVN(VNI, NewVNInfo, - RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, - RHSValNoAssignments, LHSValNoAssignments); - } + assert(!TaintExtent.empty() && "There should be at least one conflict."); - // Armed with the mappings of LHS/RHS values to ultimate values, walk the - // interval lists to see if these intervals are coalescable. - LiveInterval::const_iterator I = LHS.begin(); - LiveInterval::const_iterator IE = LHS.end(); - LiveInterval::const_iterator J = RHS.begin(); - LiveInterval::const_iterator JE = RHS.end(); - - // Collect interval end points that will no longer be kills. - SmallVector LHSOldKills; - SmallVector RHSOldKills; - - // Skip ahead until the first place of potential sharing. - if (I != IE && J != JE) { - if (I->start < J->start) { - I = std::upper_bound(I, IE, J->start); - if (I != LHS.begin()) --I; - } else if (J->start < I->start) { - J = std::upper_bound(J, JE, I->start); - if (J != RHS.begin()) --J; + // Now look at the instructions from VNI->def to TaintExtent (inclusive). + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + MachineBasicBlock::iterator MI = MBB->begin(); + if (!VNI->isPHIDef()) { + MI = Indexes->getInstructionFromIndex(VNI->def); + // No need to check the instruction defining VNI for reads. + ++MI; } - } - - while (I != IE && J != JE) { - // Determine if these two live ranges overlap. - // If so, check value # info to determine if they are really different. - if (I->end > J->start && J->end > I->start) { - // If the live range overlap will map to the same value number in the - // result liverange, we can still coalesce them. If not, we can't. - if (LHSValNoAssignments[I->valno->id] != - RHSValNoAssignments[J->valno->id]) + assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && + "Interference ends on VNI->def. Should have been handled earlier"); + MachineInstr *LastMI = + Indexes->getInstructionFromIndex(TaintExtent.front().first); + assert(LastMI && "Range must end at a proper instruction"); + unsigned TaintNum = 0; + for(;;) { + assert(MI != MBB->end() && "Bad LastMI"); + if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { + DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); return false; - - // Extended live ranges should no longer be killed. - if (!I->end.isBlock() && I->end < J->end) - if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end)) - LHSOldKills.push_back(MI); - if (!J->end.isBlock() && J->end < I->end) - if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end)) - RHSOldKills.push_back(MI); + } + // LastMI is the last instruction to use the current value. + if (&*MI == LastMI) { + if (++TaintNum == TaintExtent.size()) + break; + LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); + assert(LastMI && "Range must end at a proper instruction"); + TaintedLanes = TaintExtent[TaintNum].second; + } + ++MI; } - if (I->end < J->end) - ++I; - else - ++J; - } - - // Clear kill flags where live ranges are extended. - while (!LHSOldKills.empty()) - LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); - while (!RHSOldKills.empty()) - RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI); - - if (LHSValNoAssignments.empty()) - LHSValNoAssignments.push_back(-1); - if (RHSValNoAssignments.empty()) - RHSValNoAssignments.push_back(-1); - - // Now erase all the redundant copies. - for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) { - MachineInstr *MI = DeadCopies[i]; - if (!ErasedInstrs.insert(MI)) - continue; - DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI) - << '\t' << *MI); - LIS->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); + // The tainted lanes are unused. + V.Resolution = CR_Replace; + ++NumLaneResolves; } + return true; +} - SmallVector SourceRegisters; - for (SmallVector::iterator I = DupCopies.begin(), - E = DupCopies.end(); I != E; ++I) { - MachineInstr *MI = *I; - if (!ErasedInstrs.insert(MI)) - continue; +// Determine if ValNo is a copy of a value number in LI or Other.LI that will +// be pruned: +// +// %dst = COPY %src +// %src = COPY %dst <-- This value to be pruned. +// %dst = COPY %src <-- This value is a copy of a pruned value. +// +bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.Pruned || V.PrunedComputed) + return V.Pruned; + + if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) + return V.Pruned; + + // Follow copies up the dominator tree and check if any intermediate value + // has been pruned. + V.PrunedComputed = true; + V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); + return V.Pruned; +} - // If MI is a copy, then we have pretended that the assignment to B in - // A = X - // B = X - // was actually a copy from A. Now that we decided to coalesce A and B, - // transform the code into - // A = X - // In the case of the implicit_def, we just have to remove it. - if (!MI->isImplicitDef()) { - unsigned Src = MI->getOperand(1).getReg(); - SourceRegisters.push_back(Src); +void JoinVals::pruneValues(JoinVals &Other, + SmallVectorImpl &EndPoints) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + SlotIndex Def = LI.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: + break; + case CR_Replace: { + // This value takes precedence over the value in Other.LI. + LIS->pruneValue(&Other.LI, Def, &EndPoints); + // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF + // instructions are only inserted to provide a live-out value for PHI + // predecessors, so the instruction should simply go away once its value + // has been replaced. + Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; + bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep; + if (!Def.isBlock()) { + // Remove flags. This def is now a partial redef. + // Also remove flags since the joined live range will + // continue past this instruction. + for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); + MO.isValid(); ++MO) + if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) { + MO->setIsUndef(EraseImpDef); + MO->setIsDead(false); + } + // This value will reach instructions below, but we need to make sure + // the live range also reaches the instruction at Def. + if (!EraseImpDef) + EndPoints.push_back(Def); + } + DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def + << ": " << Other.LI << '\n'); + break; + } + case CR_Erase: + case CR_Merge: + if (isPrunedValue(i, Other)) { + // This value is ultimately a copy of a pruned value in LI or Other.LI. + // We can no longer trust the value mapping computed by + // computeAssignment(), the value that was originally copied could have + // been replaced. + LIS->pruneValue(&LI, Def, &EndPoints); + DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at " + << Def << ": " << LI << '\n'); + } + break; + case CR_Unresolved: + case CR_Impossible: + llvm_unreachable("Unresolved conflicts"); } - LIS->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); } +} - // If B = X was the last use of X in a liverange, we have to shrink it now - // that B = X is gone. - for (SmallVector::iterator I = SourceRegisters.begin(), - E = SourceRegisters.end(); I != E; ++I) { - LIS->shrinkToUses(&LIS->getInterval(*I)); +void JoinVals::eraseInstrs(SmallPtrSet &ErasedInstrs, + SmallVectorImpl &ShrinkRegs) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + // Get the def location before markUnused() below invalidates it. + SlotIndex Def = LI.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: + // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any + // longer. The IMPLICIT_DEF instructions are only inserted by + // PHIElimination to guarantee that all PHI predecessors have a value. + if (!Vals[i].IsImplicitDef || !Vals[i].Pruned) + break; + // Remove value number i from LI. Note that this VNInfo is still present + // in NewVNInfo, so it will appear as an unused value number in the final + // joined interval. + LI.getValNumInfo(i)->markUnused(); + LI.removeValNo(LI.getValNumInfo(i)); + DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n'); + // FALL THROUGH. + + case CR_Erase: { + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); + assert(MI && "No instruction to erase"); + if (MI->isCopy()) { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && + Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + ShrinkRegs.push_back(Reg); + } + ErasedInstrs.insert(MI); + DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + break; + } + default: + break; + } } +} + +bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { + SmallVector NewVNInfo; + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); + JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); + JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); + + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS + << '\n'); + + // First compute NewVNInfo and the simple value mappings. + // Detect impossible conflicts early. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) + return false; + + // Some conflicts can only be resolved after all values have been mapped. + if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) + return false; - // If we get here, we know that we can coalesce the live ranges. Ask the - // intervals to coalesce themselves now. - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + // All clear, the live ranges can be merged. + + // The merging algorithm in LiveInterval::join() can't handle conflicting + // value mappings, so we need to remove any live ranges that overlap a + // CR_Replace resolution. Collect a set of end points that can be used to + // restore the live range after joining. + SmallVector EndPoints; + LHSVals.pruneValues(RHSVals, EndPoints); + RHSVals.pruneValues(LHSVals, EndPoints); + + // Erase COPY and IMPLICIT_DEF instructions. This may cause some external + // registers to require trimming. + SmallVector ShrinkRegs; + LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + while (!ShrinkRegs.empty()) + LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + + // Join RHS into LHS. + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, MRI); + + // Kill flags are going to be wrong if the live ranges were overlapping. + // Eventually, we should simply clear all kill flags when computing live + // ranges. They are reinserted after register allocation. + MRI->clearKillFlags(LHS.reg); + MRI->clearKillFlags(RHS.reg); + + if (EndPoints.empty()) + return true; + + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LHS << '\n'); + LIS->extendToIndices(&LHS, EndPoints); return true; } +/// joinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { + return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); +} + namespace { // DepthMBBCompare - Comparison predicate that sort first based on the loop // depth of the basic block (the unsigned), and then on the MBB number. @@ -1564,8 +2024,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { Loops = &getAnalysis(); DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)MF->getFunction())->getName() << '\n'); + << "********** Function: " << MF->getName() << '\n'); if (VerifyCoalescing) MF->verify(this, "Before register coalescing"); diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 8a6df98..47c3df1 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -63,6 +63,13 @@ namespace llvm { : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + /// Create a CoalescerPair representing a virtreg-to-physreg copy. + /// No need to call setRegisters(). + CoalescerPair(unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &tri) + : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + /// setRegisters - set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. bool setRegisters(const MachineInstr*); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 43448c8..543c426 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -63,7 +63,8 @@ void RegisterPressure::decrease(const TargetRegisterClass *RC, decreaseSetPressure(MaxSetPressure, RC, TRI); } -void RegisterPressure::dump(const TargetRegisterInfo *TRI) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; @@ -78,6 +79,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) { << '\n'; } } +#endif /// Increase the current pressure as impacted by these physical registers and /// bump the high water mark if needed. @@ -320,10 +322,8 @@ struct RegisterOperands { if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end()) DeadDefs.push_back(MO.getReg()); } - else { - if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end()) - Defs.push_back(MO.getReg()); - } + else if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end()) + Defs.push_back(MO.getReg()); } } }; @@ -335,7 +335,7 @@ static void collectOperands(const MachineInstr *MI, PhysRegOperands &PhysRegOpers, VirtRegOperands &VirtRegOpers, const TargetRegisterInfo *TRI, - const RegisterClassInfo *RCI) { + const MachineRegisterInfo *MRI) { for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) { const MachineOperand &MO = *OperI; if (!MO.isReg() || !MO.getReg()) @@ -343,7 +343,7 @@ static void collectOperands(const MachineInstr *MI, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) VirtRegOpers.collect(MO, TRI); - else if (RCI->isAllocatable(MO.getReg())) + else if (MRI->isAllocatable(MO.getReg())) PhysRegOpers.collect(MO, TRI); } // Remove redundant physreg dead defs. @@ -449,7 +449,7 @@ bool RegPressureTracker::recede() { PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI); // Boost pressure for all dead defs together. increasePhysRegPressure(PhysRegOpers.DeadDefs); @@ -522,7 +522,7 @@ bool RegPressureTracker::advance() { PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI); // Kill liveness at last uses. for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { @@ -664,7 +664,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Account for register pressure similar to RegPressureTracker::recede(). PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI); // Boost max pressure for all dead defs together. // Since CurrSetPressure and MaxSetPressure @@ -674,9 +674,16 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { decreaseVirtRegPressure(VirtRegOpers.DeadDefs); // Kill liveness at live defs. - decreasePhysRegPressure(PhysRegOpers.Defs); - decreaseVirtRegPressure(VirtRegOpers.Defs); - + for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Defs[i]; + if (!findReg(Reg, false, PhysRegOpers.Uses, TRI)) + decreasePhysRegPressure(PhysRegOpers.Defs); + } + for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Defs[i]; + if (!findReg(Reg, true, VirtRegOpers.Uses, TRI)) + decreaseVirtRegPressure(VirtRegOpers.Defs); + } // Generate liveness for uses. for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { unsigned Reg = PhysRegOpers.Uses[i]; @@ -750,7 +757,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // Account for register pressure similar to RegPressureTracker::recede(). PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI); // Kill liveness at last uses. Assume allocatable physregs are single-use // rather than checking LiveIntervals. diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index d673794..5ec6564 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -92,9 +92,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { KillRegs.resize(NumPhysRegs); DefRegs.resize(NumPhysRegs); - // Create reserved registers bitvector. - ReservedRegs = TRI->getReservedRegs(MF); - // Create callee-saved registers bitvector. CalleeSavedRegs.resize(NumPhysRegs); const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); @@ -225,9 +222,9 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { used = RegsAvailable; used.flip(); if (includeReserved) - used |= ReservedRegs; + used |= MRI->getReservedRegs(); else - used.reset(ReservedRegs); + used.reset(MRI->getReservedRegs()); } unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 752f8e4..9a65071 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -279,6 +279,7 @@ void SUnit::ComputeHeight() { } while (!WorkList.empty()); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { @@ -336,6 +337,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { } dbgs() << "\n"; } +#endif #ifndef NDEBUG /// VerifyScheduledDAG - Verify that all SUnits were scheduled and that diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 9c1dba3..a4d4a93 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAGILP.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" @@ -30,6 +31,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" @@ -44,14 +46,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineDominatorTree &mdt, bool IsPostRAFlag, LiveIntervals *lis) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), - InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), - IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false), - LoopRegs(MDT), FirstDbgValue(0) { + : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), + IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && "Virtual registers must be removed prior to PostRA scheduling"); + + const TargetSubtargetInfo &ST = TM.getSubtarget(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); } /// getUnderlyingObjectFromInt - This is the function that does the work of @@ -68,7 +71,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { // object. We don't have to worry about the case where the // object address is somehow being computed by the multiply, // because our callers only care when the result is an - // identifibale object. + // identifiable object. if (U->getOpcode() != Instruction::Add || (!isa(U->getOperand(1)) && Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) @@ -135,10 +138,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { BB = bb; - LoopRegs.Deps.clear(); - if (MachineLoop *ML = MLI.getLoopFor(BB)) - if (BB == ML->getLoopLatch()) - LoopRegs.VisitLoop(ML); } void ScheduleDAGInstrs::finishBlock() { @@ -174,9 +173,6 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, EndIndex = endcount; MISUnitMap.clear(); - // Check to see if the scheduler cares about latencies. - UnitLatencies = forceUnitLatencies(); - ScheduleDAG::clearDAG(); } @@ -209,7 +205,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); else { assert(!IsPostRA && "Virtual register encountered after regalloc."); addVRegUseDeps(&ExitSU, i); @@ -225,59 +221,44 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (!Uses.contains(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); } } } /// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. -void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, - const MachineOperand &MO) { +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { + const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtargetInfo &ST = TM.getSubtarget(); - unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); - unsigned DataLatency = SU->Latency; for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; - std::vector &UseList = Uses[*Alias]; + std::vector &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; + SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; - unsigned LDataLatency = DataLatency; - // Optionally add in a special extra latency for nodes that - // feed addresses. - // TODO: Perhaps we should get rid of - // SpecialAddressLatency and just move this into - // adjustSchedDependency for the targets that care about it. - if (SpecialAddressLatency != 0 && !UnitLatencies && - UseSU != &ExitSU) { - MachineInstr *UseMI = UseSU->getInstr(); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); - assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); - if (RegUseIndex >= 0 && - (UseMI->mayLoad() || UseMI->mayStore()) && - (unsigned)RegUseIndex < UseMCID.getNumOperands() && - UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) - LDataLatency += SpecialAddressLatency; - } - // Adjust the dependence latency using operand def/use - // information (if any), and then allow the target to - // perform its own adjustments. - SDep dep(SU, SDep::Data, LDataLatency, *Alias); - if (!UnitLatencies) { - unsigned Latency = computeOperandLatency(SU, UseSU, dep); - dep.setLatency(Latency); - - ST.adjustSchedDependency(SU, UseSU, dep); - } + + SDep dep(SU, SDep::Data, *Alias); + + // Adjust the dependence latency using operand def/use information, + // then allow the target to perform its own adjustments. + int UseOp = UseList[i].OpIdx; + MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr(); + dep.setLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/false)); + dep.setMinLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/true)); + + ST.adjustSchedDependency(SU, UseSU, dep); UseSU->addPred(dep); } } @@ -301,20 +282,23 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; - std::vector &DefList = Defs[*Alias]; + std::vector &DefList = Defs[*Alias]; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i]; + SUnit *DefSU = DefList[i].SU; if (DefSU == &ExitSU) continue; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(*Alias))) { if (Kind == SDep::Anti) - DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); + DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); else { - unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, - DefSU->getInstr()); - DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); + SDep Dep(SU, Kind, /*Reg=*/*Alias); + unsigned OutLatency = + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); + Dep.setMinLatency(OutLatency); + Dep.setLatency(OutLatency); + DefSU->addPred(Dep); } } } @@ -324,61 +308,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].push_back(SU); + Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx)); } else { - addPhysRegDataDeps(SU, MO); + addPhysRegDataDeps(SU, OperIdx); // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's defs. - std::vector &DefList = Defs[MO.getReg()]; - - // If a def is going to wrap back around to the top of the loop, - // backschedule it. - if (!UnitLatencies && DefList.empty()) { - LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); - if (I != LoopRegs.Deps.end()) { - const MachineOperand *UseMO = I->second.first; - unsigned Count = I->second.second; - const MachineInstr *UseMI = UseMO->getParent(); - unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - const TargetSubtargetInfo &ST = - TM.getSubtarget(); - unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); - // TODO: If we knew the total depth of the region here, we could - // handle the case where the whole loop is inside the region but - // is large enough that the isScheduleHigh trick isn't needed. - if (UseMOIdx < UseMCID.getNumOperands()) { - // Currently, we only support scheduling regions consisting of - // single basic blocks. Check to see if the instruction is in - // the same region by checking to see if it has the same parent. - if (UseMI->getParent() != MI->getParent()) { - unsigned Latency = SU->Latency; - if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) - Latency += SpecialAddressLatency; - // This is a wild guess as to the portion of the latency which - // will be overlapped by work done outside the current - // scheduling region. - Latency -= std::min(Latency, Count); - // Add the artificial edge. - ExitSU.addPred(SDep(SU, SDep::Order, Latency, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); - } else if (SpecialAddressLatency > 0 && - UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { - // The entire loop body is within the current scheduling region - // and the latency of this operation is assumed to be greater - // than the latency of the loop. - // TODO: Recursively mark data-edge predecessors as - // isScheduleHigh too. - SU->isScheduleHigh = true; - } - } - LoopRegs.Deps.erase(I); - } - } + std::vector &DefList = Defs[MO.getReg()]; // clear this register's use list if (Uses.contains(MO.getReg())) @@ -393,11 +330,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // the block. Instead, we leave only one call at the back of the // DefList. if (SU->isCall) { - while (!DefList.empty() && DefList.back()->isCall) + while (!DefList.empty() && DefList.back().SU->isCall) DefList.pop_back(); } // Defs are pushed in the order they are visited and never reordered. - DefList.push_back(SU); + DefList.push_back(PhysRegSUOper(SU, OperIdx)); } } @@ -430,9 +367,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { else { SUnit *DefSU = DefI->SU; if (DefSU != SU && DefSU != &ExitSU) { - unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, - DefSU->getInstr()); - DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); + SDep Dep(SU, SDep::Output, Reg); + unsigned OutLatency = + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); + Dep.setMinLatency(OutLatency); + Dep.setLatency(OutLatency); + DefSU->addPred(Dep); } DefI->SU = SU; } @@ -462,18 +402,17 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { if (DefSU) { // The reaching Def lives within this scheduling region. // Create a data dependence. - // - // TODO: Handle "special" address latencies cleanly. - SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg); - if (!UnitLatencies) { - // Adjust the dependence latency using operand def/use information, then - // allow the target to perform its own adjustments. - unsigned Latency = computeOperandLatency(DefSU, SU, const_cast(dep)); - dep.setLatency(Latency); - - const TargetSubtargetInfo &ST = TM.getSubtarget(); - ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); - } + SDep dep(DefSU, SDep::Data, Reg); + // Adjust the dependence latency using operand def/use information, then + // allow the target to perform its own adjustments. + int DefOp = Def->findRegisterDefOperandIdx(Reg); + dep.setLatency( + SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false)); + dep.setMinLatency( + SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true)); + + const TargetSubtargetInfo &ST = TM.getSubtarget(); + ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); SU->addPred(dep); } } @@ -481,14 +420,14 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Add antidependence to the following def of the vreg it uses. VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); if (DefI != VRegDefs.end() && DefI->SU != SU) - DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); + DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); } /// Return true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { if (MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasVolatileMemoryRef() && + (MI->hasOrderedMemoryRef() && (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) return true; return false; @@ -621,8 +560,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, // and stop descending. if (*Depth > 200 || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { - SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0, - /*isNormalMemory=*/true)); + SUb->addPred(SDep(SUa, SDep::MayAliasMem)); return *Depth; } // Track current depth. @@ -653,9 +591,9 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (SU == *I) continue; if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { - unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0; - (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, - /*isNormalMemory=*/true)); + SDep Dep(SU, SDep::MayAliasMem); + Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); + (*I)->addPred(Dep); } // Now go through all the chain successors and iterate from them. // Keep track of visited nodes. @@ -678,9 +616,11 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, // If this is a false dependency, // do not add the edge, but rememeber the rejected node. if (!EnableAASchedMI || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) - SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0, - isNormalMemory)); + MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + SUb->addPred(Dep); + } else { // Duplicate entries should be ignored. RejectList.insert(SUb); @@ -718,10 +658,7 @@ void ScheduleDAGInstrs::initSUnits() { SU->isCommutable = MI->isCommutable(); // Assign the Latency field of SU using target-provided information. - if (UnitLatencies) - SU->Latency = 1; - else - computeLatency(SU); + SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); } } @@ -825,16 +762,19 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // references, even those that are known to not alias. for (std::map::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + I->second->addPred(SDep(SU, SDep::Barrier)); } for (std::map >::iterator I = NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + SDep Dep(SU, SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + I->second[i]->addPred(Dep); + } } // Add SU to the barrier chain. if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain->addPred(SDep(SU, SDep::Barrier)); BarrierChain = SU; // This is a barrier event that acts as a pivotal node in the DAG, // so it is safe to clear list of exposed nodes. @@ -922,7 +862,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // SU and barrier _could_ be reordered, they should not. In addition, // we have lost all RejectMemNodes below barrier. if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain->addPred(SDep(SU, SDep::Barrier)); } else { // Treat all other stores conservatively. goto new_alias_chain; @@ -931,10 +871,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (!ExitSU.isPred(SU)) // Push store's up a bit to avoid them getting in between cmp // and branches. - ExitSU.addPred(SDep(SU, SDep::Order, 0, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + ExitSU.addPred(SDep(SU, SDep::Artificial)); } else if (MI->mayLoad()) { bool MayAlias = true; if (MI->isInvariantLoad(AA)) { @@ -969,7 +906,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (MayAlias && AliasChain) addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain->addPred(SDep(SU, SDep::Barrier)); } } } @@ -982,34 +919,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, PendingLoads.clear(); } -void ScheduleDAGInstrs::computeLatency(SUnit *SU) { - // Compute the latency for the node. We only provide a default for missing - // itineraries. Empty itineraries still have latency properties. - if (!InstrItins) { - SU->Latency = 1; - - // Simplistic target-independent heuristic: assume that loads take - // extra time. - if (SU->getInstr()->mayLoad()) - SU->Latency += 2; - } else { - SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); - } -} - -unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, - const SDep& dep, - bool FindMin) const { - // For a data dependency with a known register... - if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) - return 1; - - return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(), - Use->getInstr(), dep.getReg(), FindMin); -} - void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) SU->getInstr()->dump(); +#endif } std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { @@ -1029,3 +942,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { std::string ScheduleDAGInstrs::getDAGName() const { return "dag." + BB->getFullName(); } + +namespace { +/// \brief Manage the stack used by a reverse depth-first search over the DAG. +class SchedDAGReverseDFS { + std::vector > DFSStack; +public: + bool isComplete() const { return DFSStack.empty(); } + + void follow(const SUnit *SU) { + DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); + } + void advance() { ++DFSStack.back().second; } + + void backtrack() { DFSStack.pop_back(); } + + const SUnit *getCurr() const { return DFSStack.back().first; } + + SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } + + SUnit::const_pred_iterator getPredEnd() const { + return getCurr()->Preds.end(); + } +}; +} // anonymous + +void ScheduleDAGILP::resize(unsigned NumSUnits) { + ILPValues.resize(NumSUnits); +} + +ILPValue ScheduleDAGILP::getILP(const SUnit *SU) { + return ILPValues[SU->NodeNum]; +} + +// A leaf node has an ILP of 1/1. +static ILPValue initILP(const SUnit *SU) { + unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1; + return ILPValue(Cnt, 1 + SU->getDepth()); +} + +/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first +/// search from this root. +void ScheduleDAGILP::computeILP(const SUnit *Root) { + if (!IsBottomUp) + llvm_unreachable("Top-down ILP metric is unimplemnted"); + + SchedDAGReverseDFS DFS; + // Mark a node visited by validating it. + ILPValues[Root->NodeNum] = initILP(Root); + DFS.follow(Root); + for (;;) { + // Traverse the leftmost path as far as possible. + while (DFS.getPred() != DFS.getPredEnd()) { + const SUnit *PredSU = DFS.getPred()->getSUnit(); + DFS.advance(); + // If the pred is already valid, skip it. + if (ILPValues[PredSU->NodeNum].isValid()) + continue; + ILPValues[PredSU->NodeNum] = initILP(PredSU); + DFS.follow(PredSU); + } + // Visit the top of the stack in postorder and backtrack. + unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount; + DFS.backtrack(); + if (DFS.isComplete()) + break; + // Add the recently finished predecessor's bottom-up descendent count. + ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ILPValue::print(raw_ostream &OS) const { + if (!isValid()) + OS << "BADILP"; + OS << InstrCount << " / " << Cycles << " = " + << format("%g", ((double)InstrCount / Cycles)); +} + +void ILPValue::dump() const { + dbgs() << *this << '\n'; +} + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { + Val.print(OS); + return OS; +} + +} // namespace llvm +#endif // !NDEBUG || LLVM_ENABLE_DUMP diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 38feee9..6e781b1 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Constants.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -35,7 +34,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { - return G->MF.getFunction()->getName(); + return G->MF.getName(); } static bool renderGraphFromBottomUp() { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e675366..2cd84d6 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -89,6 +89,7 @@ void ScoreboardHazardRecognizer::Reset() { ReservedScoreboard.reset(); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << "Scoreboard:\n"; @@ -104,6 +105,7 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << '\n'; } } +#endif bool ScoreboardHazardRecognizer::atIssueLimit() const { if (IssueWidth == 0) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4e29879..37d7731 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -194,6 +194,7 @@ namespace { SDValue visitOR(SDNode *N); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); + SDValue SimplifyVUnaryOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -269,6 +270,8 @@ namespace { SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); + SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -300,6 +303,11 @@ namespace { /// looking for a better chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Merge consecutive store operations into a wide store. + /// This optimization uses wide integers or vectors when possible. + /// \return True if some memory operations were changed. + bool MergeConsecutiveStores(StoreSDNode *N); + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), @@ -385,10 +393,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth = 0) { - // No compile time optimizations on this type. - if (Op.getValueType() == MVT::ppcf128) - return 0; - // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; @@ -413,7 +417,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) return 0; - // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) return V; @@ -1643,7 +1647,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { - SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); + SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), + VT); return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, N1.getOperand(0)); } @@ -2345,16 +2350,19 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // we don't want to undo this promotion. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. - if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) - && Level == AfterLegalizeTypes) { + if ((N0.getOpcode() == ISD::BITCAST || + N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && + Level == AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); - // If both incoming values are integers, and the original types are the same. + DebugLoc DL = N->getDebugLoc(); + // If both incoming values are integers, and the original types are the + // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); - SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); AddToWorkList(Op.getNode()); return BC; } @@ -2496,8 +2504,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // lanes of the constant together. EVT VT = Vector->getValueType(0); unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (BitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(BitWidth); + SplatBitSize < BitWidth; + SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); } } @@ -2984,7 +3002,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); - else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), @@ -3202,11 +3220,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if ((LShVal + RShVal) != OpSizeInBits) return 0; - SDValue Rot; - if (HasROTL) - Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); - else - Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -3239,12 +3254,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast(RHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTL) - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); } } } @@ -3256,25 +3267,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast(LHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTR) - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } } } // Look for sign/zext/any-extended or truncate cases: - if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && - (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { SDValue LExtOp0 = LHSShiftAmt.getOperand(0); SDValue RExtOp0 = RHSShiftAmt.getOperand(0); if (RExtOp0.getOpcode() == ISD::SUB && @@ -4046,7 +4053,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && - TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) && + TLI.getBooleanContents(false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -4412,20 +4420,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); - if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, - N0.getOperand(0), N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); - } + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -5235,13 +5241,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // if the source is smaller than the dest, we still need an extend return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); - else if (N0.getOperand(0).getValueType().bitsGT(VT)) + if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); - else - // if the source and dest are the same type, we can drop both the extend - // and the truncate. - return N0.getOperand(0); + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); } // Fold extract-and-trunc into a narrow extract. For example: @@ -5301,6 +5306,48 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (Reduced.getNode()) return Reduced; } + // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), + // where ... are all 'undef'. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { + SmallVector VTs; + SDValue V; + unsigned Idx = 0; + unsigned NumDefs = 0; + + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue X = N0.getOperand(i); + if (X.getOpcode() != ISD::UNDEF) { + V = X; + Idx = i; + NumDefs++; + } + // Stop if more than one members are non-undef. + if (NumDefs > 1) + break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), + VT.getVectorElementType(), + X.getValueType().getVectorNumElements())); + } + + if (NumDefs == 0) + return DAG.getUNDEF(VT); + + if (NumDefs == 1) { + assert(V.getNode() && "The single defined operand is empty!"); + SmallVector Opnds; + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (i != Idx) { + Opnds.push_back(DAG.getUNDEF(VTs[i])); + continue; + } + SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); + AddToWorkList(NV.getNode()); + Opnds.push_back(NV); + } + return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + &Opnds[0], Opnds.size()); + } + } // Simplify the operands using demanded-bits information. if (!VT.isVector() && @@ -5338,7 +5385,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { !LD2->isVolatile() && DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); - unsigned NewAlign = TLI.getTargetData()-> + unsigned NewAlign = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && @@ -5407,7 +5454,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { !cast(N0)->isVolatile() && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast(N0); - unsigned Align = TLI.getTargetData()-> + unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); @@ -5430,7 +5477,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && - N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { + N0.getNode()->hasOneUse() && VT.isInteger() && + !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); @@ -5653,7 +5701,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // fold (fadd c1, c2) -> c1 + c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -5664,12 +5712,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); @@ -5681,6 +5729,139 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // If allow, fold (fadd (fneg x), x) -> 0.0 + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + return DAG.getConstantFP(0.0, VT); + } + + // If allow, fold (fadd x, (fneg x)) -> 0.0 + if (DAG.getTarget().Options.UnsafeFPMath && + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + return DAG.getConstantFP(0.0, VT); + } + + // In unsafe math mode, we can fold chains of FADD's of the same value + // into multiplications. This transform is not safe in general because + // we are reducing the number of rounding steps. + if (DAG.getTarget().Options.UnsafeFPMath && + TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && + !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast(N0.getOperand(1)); + + // (fadd (fmul c, x), x) -> (fmul c+1, x) + if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fmul x, c), x) -> (fmul c+1, x) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && + N0.getOperand(0) == N1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast(N1.getOperand(1)); + + // (fadd x, (fmul c, x)) -> (fmul c+1, x) + if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fmul x, c)) -> (fmul c+1, x) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) + if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) + if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getConstantFP(4.0, VT)); + } + } + // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && @@ -5692,8 +5873,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), N1); } - - // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, @@ -5719,7 +5900,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub c1, c2) -> c1-c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); // fold (fsub A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && @@ -5811,7 +5992,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // fold (fmul c1, c2) -> c1*c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -5867,7 +6048,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N0CFP && N0CFP->isZero()) + return N2; + if (N1CFP && N1CFP->isZero()) + return N2; + } if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -5877,6 +6065,58 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FMUL && + N0 == N2.getOperand(0) && + N2.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); + } + + + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FMUL && N1CFP && + N0.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), + N2); + } + + // (fma x, 1, y) -> (fadd x, y) + // (fma x, -1, y) -> (fadd (fneg x), y) + if (N1CFP) { + if (N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + + if (N1CFP->isExactlyValue(-1.0) && + (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { + SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + AddToWorkList(RHSNeg.getNode()); + return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + } + } + + // (fma x, c, x) -> (fmul x, (c+1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, VT))); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, VT))); + } + + return SDValue(); } @@ -5895,11 +6135,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // fold (fdiv c1, c2) -> c1/c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { // Compute the reciprocal 1.0 / c2. APFloat N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 @@ -5942,7 +6182,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); return SDValue(); @@ -5955,7 +6195,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); - if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold + if (N0CFP && N1CFP) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); if (N1CFP) { @@ -6005,7 +6245,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128 && + if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -6062,7 +6302,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128 && + if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -6117,7 +6357,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6130,7 +6370,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp - if (N0CFP && N0.getValueType() != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x @@ -6184,7 +6424,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the @@ -6225,6 +6465,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVUnaryOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); @@ -6246,6 +6491,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } } + // (fneg (fmul c, x)) -> (fmul -c, x) + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP1 = dyn_cast(N0.getOperand(1)); + if (CFP1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + N0.getOperand(1))); + } + } + return SDValue(); } @@ -6255,7 +6511,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6267,7 +6523,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6279,7 +6535,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6290,8 +6546,13 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVUnaryOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (fabs c1) -> fabs(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) @@ -6511,7 +6772,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, } else return false; - TargetLowering::AddrMode AM; + AddrMode AM; if (N->getOpcode() == ISD::ADD) { ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); if (Offset) @@ -7138,7 +7399,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) + if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -7200,7 +7461,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { unsigned LDAlign = LD->getAlignment(); unsigned STAlign = ST->getAlignment(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); + unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -7225,6 +7486,433 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } +/// Returns the base pointer and an integer offset from that object. +static std::pair GetPointerBaseAndOffset(SDValue Ptr) { + if (Ptr->getOpcode() == ISD::ADD && isa(Ptr->getOperand(1))) { + int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); + SDValue Base = Ptr->getOperand(0); + return std::make_pair(Base, Offset); + } + + return std::make_pair(Ptr, 0); +} + +/// Holds a pointer to an LSBaseSDNode as well as information on where it +/// is located in a sequence of memory operations connected by a chain. +struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; +}; + +/// Sorts store nodes in a link according to their offset from a shared +// base ptr. +struct ConsecutiveMemoryChainSorter { + bool operator()(MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + } +}; + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; + + // Don't merge vectors into wider inputs. + if (MemVT.isVector() || !MemVT.isSimple()) + return false; + + // Perform an early exit check. Do not bother looking at stored values that + // are not constants or loads. + SDValue StoredVal = St->getValue(); + bool IsLoadSrc = isa(StoredVal); + if (!isa(StoredVal) && !isa(StoredVal) && + !IsLoadSrc) + return false; + + // Only look at ends of store sequences. + SDValue Chain = SDValue(St, 1); + if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) + return false; + + // This holds the base pointer and the offset in bytes from the base pointer. + std::pair BasePtr = + GetPointerBaseAndOffset(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.first.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.first.getOpcode() == ISD::UNDEF) + return false; + + SmallVector StoreNodes; + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + unsigned Seq = 0; + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 1)->hasOneUse()) + break; + + // Find the base pointer and offset for this memory node. + std::pair Ptr = + GetPointerBaseAndOffset(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (Ptr.first.getNode() != BasePtr.first.getNode()) + break; + + // Check that the alignment is the same. + if (Index->getAlignment() != St->getAlignment()) + break; + + // The memory operands must not be volatile. + if (Index->isVolatile() || Index->isIndexed()) + break; + + // No truncation. + if (StoreSDNode *St = dyn_cast(Index)) + if (St->isTruncatingStore()) + break; + + // The stored memory type must be the same. + if (Index->getMemoryVT() != MemVT) + break; + + // We do not allow unaligned stores because we want to prevent overriding + // stores. + if (Index->getAlignment()*8 != MemVT.getSizeInBits()) + break; + + // We found a potential memory operand to merge. + StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); + + // Move up the chain to the next memory operation. + Index = dyn_cast(Index->getChain().getNode()); + } + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the base pointer. + std::sort(StoreNodes.begin(), StoreNodes.end(), + ConsecutiveMemoryChainSorter()); + + // Scan the memory operations on the chain and find the first non-consecutive + // store memory address. + unsigned LastConsecutiveStore = 0; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + for (unsigned i=1; i(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + + if (ConstantSDNode *C = dyn_cast(StoredVal)) { + NonZero |= !C->isNullValue(); + } else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) { + NonZero |= !C->getConstantFPValue()->isNullValue(); + } else { + // Non constant. + break; + } + + // Find a legal type for the constant store. + unsigned StoreBW = (i+1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + if (TLI.isTypeLegal(StoreTy)) + LastLegalType = i+1; + + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty)) + LastLegalVectorType = i + 1; + } + + // We only use vectors if the constant is known to be zero. + if (NonZero) + LastLegalVectorType = 0; + + // Check if we found a legal integer type to store. + if (LastLegalType == 0 && LastLegalVectorType == 0) + return false; + + bool UseVector = LastLegalVectorType > LastLegalType; + unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; + + // Make sure we have something to merge. + if (NumElem < 2) + return false; + + unsigned EarliestNodeUsed = 0; + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // The earliest Node in the DAG. + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); + + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + StoredVal = DAG.getConstant(0, Ty); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ?(NumElem - 1 - i) : i; + StoreSDNode *St = cast(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt<<=ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast(Val)) { + StoreInt|=C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast(Val)) { + StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + assert(false && "Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, StoreTy); + } + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the first store with the new store + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + removeFromWorkList(St); + DAG.DeleteNode(St); + } + + return true; + } + + // Below we handle the case of multiple consecutive stores that + // come from multiple consecutive loads. We merge them into a single + // wide load and a single wide store. + + // Look for load nodes which are used by the stored values. + SmallVector LoadNodes; + + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + SDValue LdBasePtr; + for (unsigned i=0; i(StoreNodes[i].MemNode); + LoadSDNode *Ld = dyn_cast(St->getValue()); + if (!Ld) break; + + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + break; + + // Check that the alignment is the same as the stores. + if (Ld->getAlignment() != St->getAlignment()) + break; + + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) + break; + + // We do not accept ext loads. + if (Ld->getExtensionType() != ISD::NON_EXTLOAD) + break; + + // The stored memory type must be the same. + if (Ld->getMemoryVT() != MemVT) + break; + + std::pair LdPtr = + GetPointerBaseAndOffset(Ld->getBasePtr()); + + // If this is not the first ptr that we check. + if (LdBasePtr.getNode()) { + // The base ptr must be the same. + if (LdPtr.first != LdBasePtr) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr.first; + } + + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); + } + + if (LoadNodes.size() < 2) + return false; + + // Scan the memory operations on the chain and find the first non-consecutive + // load memory address. These variables hold the index in the store node + // array. + unsigned LastConsecutiveLoad = 0; + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 0; + unsigned LastLegalIntegerType = 0; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = LoadNodes[0].MemNode->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads much share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; + + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + + // Find a legal type for the vector store. + EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(StoreTy)) + LastLegalVectorType = i + 1; + + // Find a legal type for the integer store. + unsigned StoreBW = (i+1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + if (TLI.isTypeLegal(StoreTy)) + LastLegalIntegerType = i + 1; + } + + // Only use vector types if the vector type is larger than the integer type. + // If they are the same, use integers. + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType; + unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) + return false; + + // The earliest Node in the DAG. + unsigned EarliestNodeUsed = 0; + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + for (unsigned i=1; i StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + } + + DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); + DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); + + LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); + SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, + FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), + false, false, false, + FirstLoad->getAlignment()); + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), false, false, + FirstInChain->getAlignment()); + + // Replace one of the loads with the new load. + LoadSDNode *Ld = cast(LoadNodes[0].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + + // Remove the rest of the load chains. + for (unsigned i = 1; i < NumElem ; ++i) { + // Replace all chain users of the old load nodes with the chain of the new + // load node. + LoadSDNode *Ld = cast(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); + } + + // Replace the first store with the new store. + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + // Remove all Store nodes. + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast(StoreNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); + removeFromWorkList(St); + DAG.DeleteNode(St); + } + + return true; +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Chain = ST->getChain(); @@ -7237,7 +7925,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = TLI.getTargetData()-> + unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || @@ -7426,6 +8114,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->getAlignment()); } + // Only perform this optimization before the types are legal, because we + // don't want to perform this optimization on every DAGCombine invocation. + if (!LegalTypes && MergeConsecutiveStores(ST)) + return SDValue(N, 0); + return ReduceLoadOpStoreWidth(N); } @@ -7504,9 +8197,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because - // we may introduce new vector instructions which are not backed by TD patterns. - // For example on AVX, extracting elements from a wide vector without using - // extract_subvector. + // we may introduce new vector instructions which are not backed by TD + // patterns. For example on AVX, extracting elements from a wide vector + // without using extract_subvector. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE && ConstEltNo && !LegalOperations) { int Elt = cast(EltNo)->getZExtValue(); @@ -7625,7 +8318,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData() + TLI.getDataLayout() ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) @@ -7690,15 +8383,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { +// Simplify (build_vec (ext )) to (bitcast (build_vec )) +SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return SDValue(); + unsigned NumInScalars = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - // A vector built entirely of undefs is undef. - if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(VT); - // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR @@ -7741,64 +8440,141 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. - EVT OutScalarTy = N->getValueType(0).getScalarType(); + EVT OutScalarTy = VT.getScalarType(); bool ValidTypes = SourceType != MVT::Other && isPowerOf2_32(OutScalarTy.getSizeInBits()) && isPowerOf2_32(SourceType.getSizeInBits()); - // We perform this optimization post type-legalization because - // the type-legalizer often scalarizes integer-promoted vectors. - // Performing this optimization before may create bit-casts which - // will be type-legalized to complex code sequences. - // We perform this optimization only before the operation legalizer because we - // may introduce illegal operations. // Create a new simpler BUILD_VECTOR sequence which other optimizations can // turn into a single shuffle instruction. - if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && - ValidTypes) { - bool isLE = TLI.isLittleEndian(); - unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); - assert(ElemRatio > 1 && "Invalid element size ratio"); - SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SourceType); - - unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); - SmallVector Ops(NewBVElems, Filler); - - // Populate the new build_vector - for (unsigned i=0; i < N->getNumOperands(); ++i) { - SDValue Cast = N->getOperand(i); - assert((Cast.getOpcode() == ISD::ANY_EXTEND || - Cast.getOpcode() == ISD::ZERO_EXTEND || - Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); - SDValue In; - if (Cast.getOpcode() == ISD::UNDEF) - In = DAG.getUNDEF(SourceType); - else - In = Cast->getOperand(0); - unsigned Index = isLE ? (i * ElemRatio) : - (i * ElemRatio + (ElemRatio - 1)); + if (!ValidTypes) + return SDValue(); + + bool isLE = TLI.isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); + + unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); + SmallVector Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == VT.getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, VT, BV); +} + +SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + + EVT SrcVT = MVT::Other; + unsigned Opcode = ISD::DELETED_NODE; + unsigned NumDefs = 0; - assert(Index < Ops.size() && "Invalid index"); - Ops[Index] = In; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + unsigned Opc = In.getOpcode(); + + if (Opc == ISD::UNDEF) + continue; + + // If all scalar values are floats and converted from integers. + if (Opcode == ISD::DELETED_NODE && + (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { + Opcode = Opc; + // If not supported by target, bail out. + if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal && + TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom) + return SDValue(); } + if (Opc != Opcode) + return SDValue(); - // The type of the new BUILD_VECTOR node. - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); - assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && - "Invalid vector size"); - // Check if the new vector type is legal. - if (!isTypeLegal(VecVT)) return SDValue(); + EVT InVT = In.getOperand(0).getValueType(); - // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VecVT, &Ops[0], Ops.size()); + // If all scalar values are typed differently, bail out. It's chosen to + // simplify BUILD_VECTOR of integer types. + if (SrcVT == MVT::Other) + SrcVT = InVT; + if (SrcVT != InVT) + return SDValue(); + NumDefs++; + } + + // If the vector has just one element defined, it's not worth to fold it into + // a vectorized one. + if (NumDefs < 2) + return SDValue(); - // The new BUILD_VECTOR node has the potential to be further optimized. - AddToWorkList(BV.getNode()); - // Bitcast to the desired type. - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) + && "Should only handle conversion from integer to float."); + assert(SrcVT != MVT::Other && "Cannot determine source type!"); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); + SmallVector Opnds; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + + if (In.getOpcode() == ISD::UNDEF) + Opnds.push_back(DAG.getUNDEF(SrcVT)); + else + Opnds.push_back(In.getOperand(0)); } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + &Opnds[0], Opnds.size()); + AddToWorkList(BV.getNode()); + + return DAG.getNode(Opcode, dl, VT, BV); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + SDValue V = reduceBuildVecExtToExtBuildVec(N); + if (V.getNode()) + return V; + + V = reduceBuildVecConvertToConvertBuildVec(N); + if (V.getNode()) + return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from @@ -7876,15 +8652,22 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); + // If the input vector type has a different base type to the output + // vector type, bail out. + if (VecIn1.getValueType().getVectorElementType() != + VT.getVectorElementType()) + return SDValue(); + // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } // If VecIn2 is unused then change it to undef. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); - // Check that we were able to transform all incoming values to the same type. + // Check that we were able to transform all incoming values to the same + // type. if (VecIn2.getValueType() != VecIn1.getValueType() || VecIn1.getValueType() != VT) return SDValue(); @@ -7897,7 +8680,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue Ops[2]; Ops[0] = VecIn1; Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); } return SDValue(); @@ -7933,8 +8716,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); // Only handle cases where both indexes are constants with the same type. - ConstantSDNode *InsIdx = dyn_cast(N->getOperand(1)); - ConstantSDNode *ExtIdx = dyn_cast(V->getOperand(2)); + ConstantSDNode *ExtIdx = dyn_cast(N->getOperand(1)); + ConstantSDNode *InsIdx = dyn_cast(V->getOperand(2)); if (InsIdx && ExtIdx && InsIdx->getValueType(0).getSizeInBits() <= 64 && @@ -7951,6 +8734,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } } + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + if (V->getOperand(0).getValueType() != NVT) + return SDValue(); + unsigned Idx = dyn_cast(N->getOperand(1))->getZExtValue(); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + return SDValue(); } @@ -8266,6 +9064,44 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } +/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. +SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { + // After legalize, the target may be depending on adds and other + // binary ops to provide legal ways to construct constants or other + // things. Simplifying them may result in a loss of legality. + if (LegalOperations) return SDValue(); + + assert(N->getValueType(0).isVector() && + "SimplifyVUnaryOp only works on vectors!"); + + SDValue N0 = N->getOperand(0); + + if (N0.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + // Operand is a BUILD_VECTOR node, see if we can constant fold it. + SmallVector Ops; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.getOpcode() != ISD::UNDEF && + Op.getOpcode() != ISD::ConstantFP) + break; + EVT EltVT = Op.getValueType(); + SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); + } + + if (Ops.size() != N0.getNumOperands()) + return SDValue(); + + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + N0.getValueType(), &Ops[0], Ops.size()); +} + SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -8349,6 +9185,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) return false; + // The loads must not depend on one another. + if (LLD->isPredecessorOf(RLD) || + RLD->isPredecessorOf(LLD)) + return false; Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), LLD->getBasePtr(), @@ -8468,7 +9308,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, const_cast(TV->getConstantFPValue()) }; Type *FPTy = Elts[0]->getType(); - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); @@ -8583,34 +9423,38 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); // Get a SetCC of the condition - // FIXME: Should probably make sure that setcc is legal if we ever have a - // target where it isn't. - SDValue Temp, SCC; - // cast from setcc result type to select result type - if (LegalTypes) { - SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), - N0, N1, CC); - if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); - else + // NOTE: Don't create a SETCC if it's not legal on this target. + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, + LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), N2.getValueType(), SCC); - } else { - SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), - N2.getValueType(), SCC); - } + } - AddToWorkList(SCC.getNode()); - AddToWorkList(Temp.getNode()); + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); - if (N2C->getAPIntValue() == 1) - return Temp; + if (N2C->getAPIntValue() == 1) + return Temp; - // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); + } } // Check to see if this is the equivalent of setcc @@ -8729,7 +9573,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { // to alias with anything but itself. Provides base object and offset as // results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - const GlobalValue *&GV, void *&CV) { + const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = 0; CV = 0; @@ -8754,8 +9598,8 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, // for ConstantSDNodes since the same constant pool entry may be represented // by multiple nodes with different offsets. if (ConstantPoolSDNode *C = dyn_cast(Base)) { - CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() - : (void *)C->getConstVal(); + CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() + : (const void *)C->getConstVal(); Offset += C->getOffset(); return false; } @@ -8780,7 +9624,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; - void *CV1, *CV2; + const void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 683fac6..4854cf7 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -53,7 +53,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" @@ -1059,7 +1059,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo, MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), TM(FuncInfo.MF->getTarget()), - TD(*TM.getTargetData()), + TD(*TM.getDataLayout()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), TRI(*TM.getRegisterInfo()), diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 3e18ea7..a418290 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -29,7 +29,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -80,9 +80,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { if (const AllocaInst *AI = dyn_cast(I)) if (const ConstantInt *CUI = dyn_cast(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. @@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { cast(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP); + MayNeedSP, AI); } for (; BB != EB; ++BB) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 4488d27..a8381b2 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -55,7 +55,8 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { /// /// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding /// the chain and glue. These operands may be implicit on the machine instr. -static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { +static unsigned countOperands(SDNode *Node, unsigned NumExpUses, + unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; @@ -63,7 +64,8 @@ static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { --N; // Ignore chain if it exists. // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. - for (unsigned I = N; I; --I) { + NumImpUses = N - NumExpUses; + for (unsigned I = N; I > NumExpUses; --I) { if (isa(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast(Node->getOperand(I - 1))) @@ -312,8 +314,6 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); - assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && - "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), @@ -390,10 +390,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = TM->getTargetData()->getPrefTypeAlignment(Type); + Align = TM->getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = TM->getTargetData()->getTypeAllocSize(Type); + Align = TM->getDataLayout()->getTypeAllocSize(Type); } } @@ -410,6 +410,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, ES->getTargetFlags())); } else if (BlockAddressSDNode *BA = dyn_cast(Op)) { MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), + BA->getOffset(), BA->getTargetFlags())); } else if (TargetIndexSDNode *TI = dyn_cast(Op)) { MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), @@ -720,7 +721,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumImpUses = 0; - unsigned NodeOperands = countOperands(Node, NumImpUses); + unsigned NodeOperands = + countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; @@ -870,6 +872,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? + TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; + + FrameIndexSDNode *FI = dyn_cast(Node->getOperand(1)); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addFrameIndex(FI->getIndex()); + break; + } + case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) @@ -884,25 +897,30 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); - // Add the HasSideEffect and isAlignStack bits. + // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore + // bits. int64_t ExtraInfo = cast(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); + // Remember to operand index of the group flags. + SmallVector GroupIdx; + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + GroupIdx.push_back(MI->getNumOperands()); MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast @@ -913,7 +931,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), @@ -928,9 +946,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. - for (; NumVals; --NumVals, ++i) + for (unsigned j = 0; j != NumVals; ++j, ++i) AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Manually set isTied bits. + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned j = 0; j != NumVals; ++j) + MI->tieOperands(DefIdx + j, UseIdx + j); + } + } break; } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 908ebb9..abf40b7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -718,7 +718,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast(Node), DAG, TLI, this); @@ -824,7 +824,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast(Node), DAG, TLI, this); } @@ -869,25 +869,24 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), - DAG, TLI, RVal, RChain); - } - } - break; + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast(Node), DAG, TLI, RVal, RChain); + } + } + break; case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(RVal, DAG); - if (Res.getNode()) { - RVal = Res; - RChain = Res.getValue(1); - } - break; + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; } case TargetLowering::Promote: { // Only promote a load of vector type to another. @@ -1060,7 +1059,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); + TLI.getDataLayout()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ ExpandUnalignedLoad(cast(Node), DAG, TLI, Value, Chain); @@ -1241,6 +1240,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::DEBUGTRAP: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Expand) { + // replace ISD::DEBUGTRAP with ISD::TRAP + SDValue NewVal; + NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(), + Node->getOperand(0)); + ReplaceNode(Node, NewVal.getNode()); + LegalizeOp(NewVal.getNode()); + return; + } + break; + default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1588,26 +1600,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, break; case TargetLowering::Expand: { ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - // FIXME: Implement more expansions. - } - - SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); - SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + case ISD::SETO: + assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) + == TargetLowering::Legal + && "If SETO is expanded, SETOEQ must be legal!"); + CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETUO: + assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) + == TargetLowering::Legal + && "If SETUO is expanded, SETUNE must be legal!"); + CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // If we are floating point, assign and break, otherwise fall through. + if (!OpVT.isInteger()) { + // We can use the 4th bit to tell if we are the unordered + // or ordered version of the opcode. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; + CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); + break; + } + // Fallthrough if we are unsigned integer. + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETLT: + case ISD::SETNE: + case ISD::SETEQ: + InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { + // We only support using the inverted operation and not a + // different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); + } + LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); + RHS = SDValue(); + CC = SDValue(); + return; + } + + SDValue SetCC1, SetCC2; + if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { + // If we aren't the ordered or unorder operation, + // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + } else { + // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + } LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); @@ -1626,7 +1683,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, DebugLoc dl) { // Create the stack frame object. unsigned SrcAlign = - TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). + TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType(). getTypeForEVT(*DAG.getContext())); SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); @@ -1638,7 +1695,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); - unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType); + unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -2042,7 +2099,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, DebugLoc dl) { - if (Op0.getValueType() == MVT::i32) { + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion // Get the stack frame index of a 8 byte buffer. @@ -2787,7 +2844,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, - DAG.getConstant(TLI.getTargetData()-> + DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer @@ -3109,6 +3166,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && + // If div is legal, it's better to do the normal expansion + !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && useDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { @@ -3366,7 +3425,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT PTy = TLI.getPointerTy(); - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e393896..92dc5a9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) - == TargetLowering::Custom) - Res = TLI.LowerOperation(SDValue(N, 0), DAG); - - if (Res.getNode() == 0) { - switch (N->getOpcode()) { - default: - #ifndef NDEBUG - dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); dbgs() << "\n"; - #endif - llvm_unreachable("Do not know how to expand this operator's operand!"); - - case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; - case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; - case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; - - case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; - case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; - case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; - case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; - case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; - case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; - case ISD::STORE: Res = ExpandFloatOp_STORE(cast(N), - OpNo); break; - } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast(N), + OpNo); break; } // If the result is null, the sub-method took care of registering results etc. diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e8e968a..a370fae 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -644,8 +644,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { EVT SmallVT = LHS.getValueType(); // To determine if the result overflowed in a larger type, we extend the - // input to the larger type, do the multiply, then check the high bits of - // the result to see if the overflow happened. + // input to the larger type, do the multiply (checking if it overflows), + // then also check the high bits of the result to see if overflow happened + // there. if (N->getOpcode() == ISD::SMULO) { LHS = SExtPromotedInteger(LHS); RHS = SExtPromotedInteger(RHS); @@ -653,24 +654,31 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { LHS = ZExtPromotedInteger(LHS); RHS = ZExtPromotedInteger(RHS); } - SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1)); + SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS); - // Overflow occurred iff the high part of the result does not - // zero/sign-extend the low part. + // Overflow occurred if it occurred in the larger type, or if the high part + // of the result does not zero/sign-extend the low part. Check this second + // possibility first. SDValue Overflow; if (N->getOpcode() == ISD::UMULO) { - // Unsigned overflow occurred iff the high part is non-zero. + // Unsigned overflow occurred if the high part is non-zero. SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, DAG.getIntPtrConstant(SmallVT.getSizeInBits())); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); } else { - // Signed overflow occurred iff the high part does not sign extend the low. + // Signed overflow occurred if the high part does not sign extend the low. SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), Mul, DAG.getValueType(SmallVT)); Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); } + // The only other way for overflow to occur is if the multiplication in the + // larger type itself overflowed. + Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow, + SDValue(Mul.getNode(), 1)); + // Use the calculated overflow everywhere. ReplaceValueWith(SDValue(N, 1), Overflow); return Mul; @@ -2253,32 +2261,35 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); - EVT PtrVT = TLI.getPointerTy(); - Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); DebugLoc dl = N->getDebugLoc(); // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - DebugLoc DL = N->getDebugLoc(); - SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); SplitInteger(MUL, Lo, Hi); // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETNE); + RHS, DAG.getConstant(0, VT), ISD::SETEQ); SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, DAG.getConstant(1, VT), RHS); - SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); - SDValue Overflow; - Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); + SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, + ISD::SETNE); + Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; } + Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 39337ff..644e36e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -15,7 +15,7 @@ #include "LegalizeTypes.h" #include "llvm/CallingConv.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 94fc976..20b7ce6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -625,6 +625,7 @@ private: SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_VSETCC(SDNode* N); + SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); @@ -633,7 +634,7 @@ private: SDValue WidenVecRes_InregOp(SDNode *N); // Widen Vector Operand. - bool WidenVectorOperand(SDNode *N, unsigned ResNo); + bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 06f6bd6..6bcb3b2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -20,7 +20,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -94,14 +94,48 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BITCAST v1i64 on x86, where the operand // is legal but the result is not. - EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); + unsigned NumElems = 2; + EVT ElemVT = NOutVT; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + + // If is not a legal type, try . + while (!isTypeLegal(NVT)) { + unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; + // If the element size is smaller than byte, bail. + if (NewSizeInBits < 8) + break; + NumElems *= 2; + ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); + NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); - Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, - DAG.getIntPtrConstant(1)); + + SmallVector Vals; + for (unsigned i = 0; i < NumElems; ++i) + Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, + CastInOp, DAG.getIntPtrConstant(i))); + + // Build Lo, Hi pair by pairing extracted elements if needed. + unsigned Slot = 0; + for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { + // Each iteration will BUILD_PAIR two nodes and append the result until + // there are only two nodes left, i.e. Lo and Hi. + SDValue LHS = Vals[Slot]; + SDValue RHS = Vals[Slot + 1]; + + if (TLI.isBigEndian()) + std::swap(LHS, RHS); + + Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + EVT::getIntegerVT( + *DAG.getContext(), + LHS.getValueType().getSizeInBits() << 1), + LHS, RHS)); + } + Lo = Vals[Slot++]; + Hi = Vals[Slot++]; if (TLI.isBigEndian()) std::swap(Lo, Hi); @@ -116,7 +150,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(NOutVT. + TLI.getDataLayout()->getPrefTypeAlignment(NOutVT. getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast(StackPtr.getNode())->getIndex(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 704f99b..22f8d51 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -64,6 +64,7 @@ class VectorLegalizer { // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); @@ -220,6 +221,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::FMA: case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; @@ -260,6 +262,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Expand: if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); + else if (Node->getOpcode() == ISD::SELECT) + Result = ExpandSELECT(Op); else if (Node->getOpcode() == ISD::UINT_TO_FP) Result = ExpandUINT_TO_FLOAT(Op); else if (Node->getOpcode() == ISD::FNEG) @@ -435,6 +439,66 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { return TF; } +SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { + // Lower a select instruction where the condition is a scalar and the + // operands are vectors. Lower this select to VSELECT and implement it + // using XOR AND OR. The selector bit is broadcasted. + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + assert(VT.isVector() && !Mask.getValueType().isVector() + && Op1.getValueType() == Op2.getValueType() && "Invalid type"); + + unsigned NumElem = VT.getVectorNumElements(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + // Generate a mask operand. + EVT MaskTy = TLI.getSetCCResultType(VT); + assert(MaskTy.isVector() && "Invalid CC type"); + assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() + && "Invalid mask size"); + + // What is the size of each element in the vector mask. + EVT BitTy = MaskTy.getScalarType(); + + Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), + DAG.getConstant(0, BitTy)); + + // Broadcast the mask so that the entire vector is all-one or all zero. + SmallVector Ops(NumElem, Mask); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -449,12 +513,17 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is // 'bitcasted' to another type which is handled. + // This operation also isn't safe with AND, OR, XOR when the boolean + // type is 0/1 as we need an all ones vector constant to mask with. + // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(true) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); - assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() + assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() && "Invalid mask size"); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4709202..d51a6eb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -21,7 +21,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -749,7 +749,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(VecType); + TLI.getDataLayout()->getPrefTypeAlignment(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, false, false, 0); @@ -1366,6 +1366,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1373,6 +1376,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SetWidenedVector(SDValue(N, ResNo), Res); } +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + DebugLoc dl = N->getDebugLoc(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. unsigned Opcode = N->getOpcode(); @@ -2069,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// // Widen Vector Operand //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Widen node operand " << ResNo << ": "; +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + switch (N->getOpcode()) { default: #ifndef NDEBUG - dbgs() << "WidenVectorOperand op #" << ResNo << ": "; + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h index f88b26d..d2269f8 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -28,8 +28,8 @@ class SDNode; class SDNodeOrdering { DenseMap OrderMap; - void operator=(const SDNodeOrdering&); // Do not implement. - SDNodeOrdering(const SDNodeOrdering&); // Do not implement. + void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; + SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; public: SDNodeOrdering() {} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index b7ce48a..2ecdd89 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -13,11 +13,12 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/SmallSet.h" @@ -34,6 +35,10 @@ STATISTIC(NumPRCopies, "Number of physical copies"); static RegisterScheduler fastDAGScheduler("fast", "Fast suboptimal list scheduling", createFastDAGScheduler); +static RegisterScheduler + linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", + createDAGLinearizer); + namespace { /// FastPriorityQueue - A degenerate priority queue that considers @@ -331,7 +336,9 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } } if (isNewLoad) { - AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); + SDep D(LoadSU, SDep::Barrier); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); } ++NumUnfolds; @@ -407,9 +414,12 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { RemovePred(DelDeps[i].first, DelDeps[i].second); } - - AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); - AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); Copies.push_back(CopyFromSU); Copies.push_back(CopyToSU); @@ -586,18 +596,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() { InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -629,6 +635,155 @@ void ScheduleDAGFast::ListScheduleBottomUp() { #endif } + +namespace { +//===----------------------------------------------------------------------===// +// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the +// DAG in topological order. +// IMPORTANT: this may not work for targets with phyreg dependency. +// +class ScheduleDAGLinearize : public ScheduleDAGSDNodes { +public: + ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + +private: + std::vector Sequence; + DenseMap GluedMap; // Cache glue to its user + + void ScheduleNode(SDNode *N); +}; +} // end anonymous namespace + +void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { + if (N->getNodeId() != 0) + llvm_unreachable(0); + + if (!N->isMachineOpcode() && + (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) + // These nodes do not need to be translated into MIs. + return; + + DEBUG(dbgs() << "\n*** Scheduling: "); + DEBUG(N->dump(DAG)); + Sequence.push_back(N); + + unsigned NumOps = N->getNumOperands(); + if (unsigned NumLeft = NumOps) { + SDNode *GluedOpN = 0; + do { + const SDValue &Op = N->getOperand(NumLeft-1); + SDNode *OpN = Op.getNode(); + + if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { + // Schedule glue operand right above N. + GluedOpN = OpN; + assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); + OpN->setNodeId(0); + ScheduleNode(OpN); + continue; + } + + if (OpN == GluedOpN) + // Glue operand is already scheduled. + continue; + + DenseMap::iterator DI = GluedMap.find(OpN); + if (DI != GluedMap.end() && DI->second != N) + // Users of glues are counted against the glued users. + OpN = DI->second; + + unsigned Degree = OpN->getNodeId(); + assert(Degree > 0 && "Predecessor over-released!"); + OpN->setNodeId(--Degree); + if (Degree == 0) + ScheduleNode(OpN); + } while (--NumLeft); + } +} + +/// findGluedUser - Find the representative use of a glue value by walking +/// the use chain. +static SDNode *findGluedUser(SDNode *N) { + while (SDNode *Glued = N->getGluedUser()) + N = Glued; + return N; +} + +void ScheduleDAGLinearize::Schedule() { + DEBUG(dbgs() << "********** DAG Linearization **********\n"); + + SmallVector Glues; + unsigned DAGSize = 0; + for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(), + E = DAG->allnodes_end(); I != E; ++I) { + SDNode *N = I; + + // Use node id to record degree. + unsigned Degree = N->use_size(); + N->setNodeId(Degree); + unsigned NumVals = N->getNumValues(); + if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && + N->hasAnyUseOfValue(NumVals-1)) { + SDNode *User = findGluedUser(N); + if (User) { + Glues.push_back(N); + GluedMap.insert(std::make_pair(N, User)); + } + } + + if (N->isMachineOpcode() || + (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) + ++DAGSize; + } + + for (unsigned i = 0, e = Glues.size(); i != e; ++i) { + SDNode *Glue = Glues[i]; + SDNode *GUser = GluedMap[Glue]; + unsigned Degree = Glue->getNodeId(); + unsigned UDegree = GUser->getNodeId(); + + // Glue user must be scheduled together with the glue operand. So other + // users of the glue operand must be treated as its users. + SDNode *ImmGUser = Glue->getGluedUser(); + for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); + ui != ue; ++ui) + if (*ui == ImmGUser) + --Degree; + GUser->setNodeId(UDegree + Degree); + Glue->setNodeId(1); + } + + Sequence.reserve(DAGSize); + ScheduleNode(DAG->getRoot().getNode()); +} + +MachineBasicBlock* +ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap VRBaseMap; + + DEBUG({ + dbgs() << "\n*** Final schedule ***\n"; + }); + + // FIXME: Handle dbg_values. + unsigned NumNodes = Sequence.size(); + for (unsigned i = 0; i != NumNodes; ++i) { + SDNode *N = Sequence[NumNodes-i-1]; + DEBUG(N->dump(DAG)); + Emitter.EmitNode(N, false, false, VRBaseMap); + } + + DEBUG(dbgs() << '\n'); + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// @@ -637,3 +792,8 @@ llvm::ScheduleDAGSDNodes * llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { return new ScheduleDAGFast(*IS->MF); } + +llvm::ScheduleDAGSDNodes * +llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGLinearize(*IS->MF); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index bf0a437..c554569 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { break; case ISD::MERGE_VALUES: case ISD::TokenFactor: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: case ISD::CopyToReg: case ISD::CopyFromReg: case ISD::EH_LABEL: @@ -1056,7 +1058,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { // Add a data dependency to reflect that NewSU reads the value defined // by LoadSU. - AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); if (isNewLoad) AvailableQueue->addNode(LoadSU); @@ -1138,17 +1142,18 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, // Avoid scheduling the def-side copy before other successors. Otherwise // we could introduce another physreg interference on the copy and // continue inserting copies indefinitely. - SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true); - AddPred(SuccSU, D); + AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); - AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); - AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); AvailableQueue->updateNode(SU); AvailableQueue->addNode(CopyFromSU); @@ -1357,9 +1362,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } - AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current // node is no longer avaialable. Schedule a successor that's now @@ -1411,20 +1414,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -1756,6 +1753,7 @@ public: return V; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump(ScheduleDAG *DAG) const { // Emulate pop() without clobbering NodeQueueIds. std::vector DumpQueue = Queue; @@ -1766,6 +1764,7 @@ public: SU->dump(DAG); } } +#endif }; typedef RegReductionPriorityQueue @@ -1893,6 +1892,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { //===----------------------------------------------------------------------===// void RegReductionPQBase::dumpRegPressure() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) { const TargetRegisterClass *RC = *I; @@ -1902,6 +1902,7 @@ void RegReductionPQBase::dumpRegPressure() const { DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] << '\n'); } +#endif } bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { @@ -2930,10 +2931,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { !scheduleDAG->IsReachable(SuccSU, SU)) { DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); } } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 748668c..a197fcb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -485,14 +485,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if(isChain && OpN->getOpcode() == ISD::TokenFactor) OpLatency = 0; - const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpLatency, PhysReg); + SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) + : SDep(OpSU, SDep::Data, PhysReg); + Dep.setLatency(OpLatency); if (!isChain && !UnitLatencies) { - computeOperandLatency(OpN, N, i, const_cast(dep)); - ST.adjustSchedDependency(OpSU, SU, const_cast(dep)); + computeOperandLatency(OpN, N, i, Dep); + ST.adjustSchedDependency(OpSU, SU, Dep); } - if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { + if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as @@ -643,6 +644,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; @@ -659,8 +661,10 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { dbgs() << "\n"; GluedNodes.pop_back(); } +#endif } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScheduleDAGSDNodes::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) @@ -669,6 +673,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif #ifndef NDEBUG /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that @@ -827,8 +832,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } SmallVector GluedNodes; - for (SDNode *N = SU->getNode()->getGluedNode(); N; - N = N->getGluedNode()) + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { SDNode *N = GluedNodes.back(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 84e41fc..907356f 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -114,7 +114,8 @@ namespace llvm { /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock /// according to the order specified in Sequence. /// - MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual MachineBasicBlock* + EmitSchedule(MachineBasicBlock::iterator &InsertPos); virtual void dumpNode(const SUnit *SU) const; @@ -158,6 +159,12 @@ namespace llvm { void InitNodeNumDefs(); }; + protected: + /// ForceUnitLatencies - Return true if all scheduling edges should be given + /// a latency value of one. The default is to return false; schedulers may + /// override this as needed. + virtual bool forceUnitLatencies() const { return false; } + private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index c851291..30f03ac 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f4fe892..f000ce3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -29,7 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetOptions.h" @@ -91,11 +91,6 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, const APFloat& Val) { assert(VT.isFloatingPoint() && "Can only convert between FP types"); - // PPC long double cannot be converted to any other type. - if (VT == MVT::ppcf128 || - &Val.getSemantics() == &APFloat::PPCDoubleDouble) - return false; - // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; @@ -136,13 +131,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { // constants are. SDValue NotZero = N->getOperand(i); unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); - if (isa(NotZero)) { - if (cast(NotZero)->getAPIntValue().countTrailingOnes() < - EltSize) + if (ConstantSDNode *CN = dyn_cast(NotZero)) { + if (CN->getAPIntValue().countTrailingOnes() < EltSize) return false; - } else if (isa(NotZero)) { - if (cast(NotZero)->getValueAPF() - .bitcastToAPInt().countTrailingOnes() < EltSize) + } else if (ConstantFPSDNode *CFPN = dyn_cast(NotZero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize) return false; } else return false; @@ -179,11 +172,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Do not accept build_vectors that aren't all constants or which have non-0 // elements. SDValue Zero = N->getOperand(i); - if (isa(Zero)) { - if (!cast(Zero)->isNullValue()) + if (ConstantSDNode *CN = dyn_cast(Zero)) { + if (!CN->isNullValue()) return false; - } else if (isa(Zero)) { - if (!cast(Zero)->getValueAPF().isPosZero()) + } else if (ConstantFPSDNode *CFPN = dyn_cast(Zero)) { + if (!CFPN->getValueAPF().isPosZero()) return false; } else return false; @@ -494,8 +487,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } case ISD::TargetBlockAddress: case ISD::BlockAddress: { - ID.AddPointer(cast(N)->getBlockAddress()); - ID.AddInteger(cast(N)->getTargetFlags()); + const BlockAddressSDNode *BA = cast(N); + ID.AddPointer(BA->getBlockAddress()); + ID.AddInteger(BA->getOffset()); + ID.AddInteger(BA->getTargetFlags()); break; } } // end switch (N->getOpcode()) @@ -883,7 +878,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TLI.getTargetData()->getABITypeAlignment(Ty); + return TLI.getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... @@ -1097,10 +1092,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, "Cannot set target flags on target-independent globals"); // Truncate (with sign-extension) the offset value to the pointer size. - EVT PTy = TLI.getPointerTy(); - unsigned BitWidth = PTy.getSizeInBits(); + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); if (BitWidth < 64) - Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + Offset = SignExtend64(Offset, BitWidth); const GlobalVariable *GVar = dyn_cast(GV); if (!GVar) { @@ -1174,7 +1168,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1201,7 +1195,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1471,6 +1465,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, + int64_t Offset, bool isTarget, unsigned char TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; @@ -1478,12 +1473,14 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddPointer(BA); + ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags); + SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, + TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1542,7 +1539,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); unsigned StackAlign = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); return getFrameIndex(FrameIdx, TLI.getPointerTy()); @@ -1555,7 +1552,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const TargetData *TD = TLI.getTargetData(); + const DataLayout *TD = TLI.getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1610,10 +1607,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } if (ConstantFPSDNode *N1C = dyn_cast(N1.getNode())) { if (ConstantFPSDNode *N2C = dyn_cast(N2.getNode())) { - // No compile time operations on this type yet. - if (N1C->getValueType(0) == MVT::ppcf128) - return SDValue(); - APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); switch (Cond) { default: break; @@ -2445,8 +2438,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - // No compile time operations on ppcf128. - if (VT == MVT::ppcf128) break; APFloat apf(APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, @@ -2455,9 +2446,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(Val.bitsToFloat(), VT); + return getConstantFP(APFloat(Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(Val.bitsToDouble(), VT); + return getConstantFP(APFloat(Val), VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), VT); @@ -2475,61 +2466,59 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, // Constant fold unary operations with a floating point constant operand. if (ConstantFPSDNode *C = dyn_cast(Operand.getNode())) { APFloat V = C->getValueAPF(); // make copy - if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) { - switch (Opcode) { - case ISD::FNEG: - V.changeSign(); + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - case ISD::FABS: - V.clearSign(); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - case ISD::FCEIL: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FTRUNC: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FFLOOR: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FP_EXTEND: { - bool ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &ignored); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - } - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: { - integerPart x[2]; - bool ignored; - assert(integerPartWidth >= 64); - // FIXME need to be more flexible about rounding mode. - APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), - Opcode==ISD::FP_TO_SINT, - APFloat::rmTowardZero, &ignored); - if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual - break; - APInt api(VT.getSizeInBits(), x); - return getConstant(api, VT); - } - case ISD::BITCAST: - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) - return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); - else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) - return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual break; - } + APInt api(VT.getSizeInBits(), x); + return getConstant(api, VT); + } + case ISD::BITCAST: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; } } @@ -2817,6 +2806,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (ConstantFPSDNode *CFP = dyn_cast(N2)) if (CFP->getValueAPF().isZero()) return N1; + } else if (Opcode == ISD::FMUL) { + ConstantFPSDNode *CFP = dyn_cast(N1); + SDValue V = N2; + + // If the first operand isn't the constant, try the second + if (!CFP) { + CFP = dyn_cast(N2); + V = N1; + } + + if (CFP) { + // 0*x --> 0 + if (CFP->isZero()) + return SDValue(CFP,0); + // 1*x --> x + if (CFP->isExactlyValue(1.0)) + return V; + } } } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); @@ -2935,17 +2942,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // expanding large vector constants. if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { SDValue Elt = N1.getOperand(N2C->getZExtValue()); - EVT VEltTy = N1.getValueType().getVectorElementType(); - if (Elt.getValueType() != VEltTy) { + + if (VT != Elt.getValueType()) // If the vector element type is not legal, the BUILD_VECTOR operands - // are promoted and implicitly truncated. Make that explicit here. - Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt); - } - if (VT != VEltTy) { - // If the vector element type is not legal, the EXTRACT_VECTOR_ELT - // result is implicitly extended. - Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt); - } + // are promoted and implicitly truncated, and the result implicitly + // extended. Make that explicit here. + Elt = getAnyExtOrTrunc(Elt, DL, VT); + return Elt; } @@ -3036,7 +3039,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // Cannonicalize constant to RHS if commutative std::swap(N1CFP, N2CFP); std::swap(N1, N2); - } else if (N2CFP && VT != MVT::ppcf128) { + } else if (N2CFP) { APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); APFloat::opStatus s; switch (Opcode) { @@ -3435,7 +3438,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() || + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || TLI.allowsUnalignedMemoryAccesses(VT)) { VT = TLI.getPointerTy(); } else { @@ -3503,7 +3506,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = + MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3523,7 +3528,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3596,7 +3601,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3612,7 +3618,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3674,7 +3680,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3687,7 +3694,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3781,7 +3788,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3836,7 +3843,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3885,7 +3892,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -3923,17 +3930,21 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); @@ -3983,17 +3994,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic store does not load; a release store "loads" in the sense - // that other stores cannot be sunk past it. + // An atomic store does not load. An atomic load does not store. // (An atomicrmw obviously both loads and stores.) - unsigned Flags = MachineMemOperand::MOStore; - if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic) - Flags |= MachineMemOperand::MOLoad; - - // For now, atomics are considered to be volatile always. + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4056,16 +4067,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic load does not store; an acquire load "stores" in the sense - // that other loads cannot be hoisted past it. - unsigned Flags = MachineMemOperand::MOLoad; - if (Ordering > Monotonic) - Flags |= MachineMemOperand::MOStore; - - // For now, atomics are considered to be volatile always. + // An atomic store does not load. An atomic load does not store. + // (An atomicrmw obviously both loads and stores.) + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4157,6 +4169,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || + Opcode == ISD::LIFETIME_START || + Opcode == ISD::LIFETIME_END || (Opcode <= INT_MAX && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -4226,7 +4240,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo, const MDNode *Ranges) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -4284,7 +4298,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal(), + MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; @@ -4303,7 +4317,7 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - bool isInvariant, unsigned Alignment, + bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -4332,7 +4346,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), false, LD->getAlignment()); } @@ -4340,7 +4354,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -4365,7 +4379,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); @@ -4394,7 +4408,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -4421,7 +4435,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); @@ -6074,7 +6088,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast(GV), KnownZero, KnownOne, - TLI.getTargetData()); + TLI.getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ba5bd79..3fbf7c2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DebugInfo.h" @@ -43,7 +44,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -88,7 +89,7 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT); + EVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -98,9 +99,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, + const Value *V, ISD::NodeType AssertOp = ISD::DELETED_NODE) { if (ValueVT.isVector()) - return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, + PartVT, ValueVT, V); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -124,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, if (RoundParts > 2) { Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, - PartVT, HalfVT); + PartVT, HalfVT, V); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, - RoundParts / 2, PartVT, HalfVT); + RoundParts / 2, PartVT, HalfVT, V); } else { Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); @@ -142,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, DL, - Parts + RoundParts, OddParts, PartVT, OddVT); + Parts + RoundParts, OddParts, PartVT, OddVT, V); // Combine the round and odd parts. Lo = Val; @@ -171,7 +174,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); } } @@ -209,14 +212,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, llvm_unreachable("Unknown mismatch!"); } -/// getCopyFromParts - Create a value that contains the specified legal parts -/// combined into the value they represent. If the parts combine to a type -/// larger then ValueVT then AssertOp can be used to specify whether the extra -/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT -/// (ISD::AssertSext). +/// getCopyFromPartsVector - Create a value that contains the specified legal +/// parts combined into the value they represent. If the parts combine to a +/// type larger then ValueVT then AssertOp can be used to specify whether the +/// extra bits are known to be zero (ISD::AssertZext) or sign extended from +/// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT) { + EVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -242,7 +245,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. @@ -251,7 +254,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the @@ -299,8 +302,19 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle cases such as i8 -> <1 x i1> - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial scalar-to-vector conversions should get here!"); + if (ValueVT.getVectorNumElements() != 1) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("non-trivial scalar-to-vector conversion"); + if (const Instruction *I = dyn_cast_or_null(V)) { + if (const CallInst *CI = dyn_cast(I)) + if (isa(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + report_fatal_error("Cannot handle scalar-to-vector conversion!"); + } if (ValueVT.getVectorNumElements() == 1 && ValueVT.getVectorElementType() != PartVT) { @@ -312,25 +326,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } - - - static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT); + EVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT, + EVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); @@ -382,7 +393,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - assert(PartVT == ValueVT && "Type conversion failed!"); + if (PartVT != ValueVT) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("scalar-to-vector conversion failed"); + if (const Instruction *I = dyn_cast_or_null(V)) { + if (const CallInst *CI = dyn_cast(I)) + if (isa(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + } + Parts[0] = Val; return; } @@ -397,7 +420,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getIntPtrConstant(RoundBits)); - getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. @@ -443,7 +466,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT) { + EVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -529,7 +552,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -537,13 +560,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); } } - - - namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information @@ -621,14 +641,15 @@ namespace { /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, + const Value *V = 0) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, const Value *V) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -647,7 +668,8 @@ namespace { SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); @@ -721,7 +743,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); + NumRegs, RegisterVT, ValueVT, V); Part += NumRegs; Parts.clear(); } @@ -736,7 +758,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Get the list of the values's legal parts. @@ -748,7 +771,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, EVT RegisterVT = RegVTs[Value]; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); + &Parts[Part], NumParts, RegisterVT, V); Part += NumParts; } @@ -824,7 +847,8 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - TD = DAG.getTarget().getTargetData(); + TD = DAG.getTarget().getDataLayout(); + Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -992,7 +1016,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1147,7 +1171,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); } llvm_unreachable("Can't get register for value!"); @@ -1203,9 +1227,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ISD::NodeType ExtendKind = ISD::ANY_EXTEND; const Function *F = I.getParent()->getParent(); - if (F->paramHasAttr(0, Attribute::SExt)) + if (F->getRetAttributes().hasAttribute(Attributes::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->paramHasAttr(0, Attribute::ZExt)) + else if (F->getRetAttributes().hasAttribute(Attributes::ZExt)) ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) @@ -1216,11 +1240,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->paramHasAttr(0, Attribute::InReg)) + if (F->getRetAttributes().hasAttribute(Attributes::InReg)) Flags.setInReg(); // Propagate extension type if any @@ -1231,7 +1255,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true)); + /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -1601,7 +1625,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // Update successor info addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); - addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1762,6 +1789,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { @@ -1799,8 +1827,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - addSuccessorWithWeight(SwitchBB, B.TargetBB); - addSuccessorWithWeight(SwitchBB, NextMBB); + // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. + addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); + // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. + addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1923,6 +1953,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: @@ -1956,8 +1987,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, ISD::SETEQ); // Update successor info. - addSuccessorWithWeight(SwitchBB, Small.BB); - addSuccessorWithWeight(SwitchBB, Default); + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchBB, Small.BB, + Small.ExtraWeight + Big.ExtraWeight); + addSuccessorWithWeight(SwitchBB, Default, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, @@ -1975,14 +2010,13 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } // Order cases by weight so the most likely case will be checked first. - BranchProbabilityInfo *BPI = FuncInfo.BPI; + uint32_t UnhandledWeights = 0; if (BPI) { for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { - uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - I->BB->getBasicBlock()); + uint32_t IWeight = I->ExtraWeight; + UnhandledWeights += IWeight; for (CaseItr J = CR.Range.first; J < I; ++J) { - uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - J->BB->getBasicBlock()); + uint32_t JWeight = J->ExtraWeight; if (IWeight > JWeight) std::swap(*I, *J); } @@ -2031,10 +2065,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, LHS = I->Low; MHS = SV; RHS = I->High; } - uint32_t ExtraWeight = I->ExtraWeight; + // The false weight should be sum of all un-handled cases. + UnhandledWeights -= I->ExtraWeight; CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, /* me */ CurBlock, - /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2); + /* trueweight */ I->ExtraWeight, + /* falseweight */ UnhandledWeights); // If emitting the first comparison, just call visitSwitchCase to emit the // code into the current block. Otherwise, push the CaseBlock onto the @@ -2079,7 +2115,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(4)) + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2134,13 +2170,28 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } } + // Calculate weight for each unique destination in CR. + DenseMap DestWeights; + if (FuncInfo.BPI) + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + DenseMap::iterator Itr = + DestWeights.find(I->BB); + if (Itr != DestWeights.end()) + Itr->second += I->ExtraWeight; + else + DestWeights[I->BB] = I->ExtraWeight; + } + // Update successor info. Add one edge to each unique successor. BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); for (std::vector::iterator I = DestBBs.begin(), E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - addSuccessorWithWeight(JumpTableBB, *I); + DenseMap::iterator Itr = + DestWeights.find(*I); + addSuccessorWithWeight(JumpTableBB, *I, + Itr != DestWeights.end() ? Itr->second : 0); } } @@ -2371,7 +2422,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, if (i == count) { assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0)); + CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); count++; } @@ -2380,6 +2431,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, uint64_t lo = (lowValue - lowBound).getZExtValue(); uint64_t hi = (highValue - lowBound).getZExtValue(); + CasesBits[i].ExtraWeight += I->ExtraWeight; for (uint64_t j = lo; j <= hi; j++) { CasesBits[i].Mask |= 1ULL << j; @@ -2407,7 +2459,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CurMF->insert(BBI, CaseBB); BTC.push_back(BitTestCase(CasesBits[i].Mask, CaseBB, - CasesBits[i].BB)); + CasesBits[i].BB, CasesBits[i].ExtraWeight)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); @@ -2435,30 +2487,25 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, Clusterifier TheClusterifier; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB); + TheClusterifier.add(i.getCaseValueEx(), SMBB, + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); } TheClusterifier.optimize(); - BranchProbabilityInfo *BPI = FuncInfo.BPI; size_t numCmps = 0; for (Clusterifier::RangeIterator i = TheClusterifier.begin(), e = TheClusterifier.end(); i != e; ++i, ++numCmps) { Clusterifier::Cluster &C = *i; - unsigned W = 0; - if (BPI) { - W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock()); - if (!W) - W = 16; - W *= C.first.Weight; - BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W); - } + // Update edge weight for the cluster. + unsigned W = C.first.Weight; // FIXME: Currently work with ConstantInt based numbers. // Changing it to APInt based is a pretty heavy for this commit. @@ -2540,9 +2587,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) continue; - // If the switch has more than 5 blocks, and at least 40% dense, and the + // If the switch has more than N blocks, and is at least 40% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. + // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; @@ -2556,14 +2604,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. - SmallVector succs; - succs.reserve(I.getNumSuccessors()); - for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) - succs.push_back(I.getSuccessor(i)); - array_pod_sort(succs.begin(), succs.end()); - succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + SmallSet Done; + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { + BasicBlock *BB = I.getSuccessor(i); + bool Inserted = Done.insert(BB); + if (!Inserted) + continue; + + MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; addSuccessorWithWeight(IndirectBrMBB, Succ); } @@ -3160,9 +3208,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); @@ -3460,7 +3508,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getType()); + EVT VT = TLI.getValueType(I.getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3490,7 +3538,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getValueOperand()->getType()); + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic store"); @@ -4352,7 +4400,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttr(Attribute::OptimizeForSize) || + if (!F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. CountPopulation_32(Val)+Log2_32(Val) < 7) { @@ -4850,7 +4898,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, MVT::i32)); + DAG.getIntPtrConstant(Idx)); + setValue(&I, Res); + return 0; + } + case Intrinsic::x86_avx_vextractf128_pd_256: + case Intrinsic::x86_avx_vextractf128_ps_256: + case Intrinsic::x86_avx_vextractf128_si_256: + case Intrinsic::x86_avx2_vextracti128: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + uint64_t Idx = (cast(I.getArgOperand(1))->getZExtValue() & 1) * + DestVT.getVectorNumElements(); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + DAG.getIntPtrConstant(Idx)); setValue(&I, Res); return 0; } @@ -5113,10 +5175,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } + case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::TRAP : ISD::DEBUGTRAP; + DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot())); return 0; } TargetLowering::ArgListTy Args; @@ -5131,10 +5196,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Result.second); return 0; } - case Intrinsic::debugtrap: { - DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot())); - return 0; - } + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: @@ -5177,14 +5239,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { rw==1)); /* write */ return 0; } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: { + bool IsStart = (Intrinsic == Intrinsic::lifetime_start); + // Stack coloring is not enabled in O0, discard region information. + if (TM.getOptLevel() == CodeGenOpt::None) + return 0; + + SmallVector Allocas; + GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + + for (SmallVector::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { + AllocaInst *LifetimeObject = dyn_cast_or_null(*Object); + // Could not find an Alloca. + if (!LifetimeObject) + continue; + + int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + + Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + DAG.setRoot(Res); + } + } case Intrinsic::invariant_start: - case Intrinsic::lifetime_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return 0; case Intrinsic::invariant_end: - case Intrinsic::lifetime_end: // Discard region information. return 0; case Intrinsic::donothing: @@ -5220,9 +5308,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, int DemoteStackIdx = -100; if (!CanLowerReturn) { - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( FTy->getReturnType()); - unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); @@ -5254,12 +5342,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.isSExt = CS.paramHasAttr(attrInd, Attributes::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attributes::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attributes::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attributes::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attributes::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attributes::ByVal); Entry.Alignment = CS.getParamAlignment(attrInd); Args.push_back(Entry); } @@ -5687,7 +5775,7 @@ public: /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const TargetData *TD) const { + const DataLayout *TD) const { if (CallOperandVal == 0) return MVT::Other; if (isa(CallOperandVal)) @@ -5991,8 +6079,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); - unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); @@ -6040,12 +6128,36 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the HasSideEffect and AlignStack bits as operand 3. + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // bits as operand 3. unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + // Set the asm dialect. + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // Ideally, we would only check against memory constraints. However, the + // meaning of an other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for other constriants as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + ExtraInfo |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + ExtraInfo |= InlineAsm::Extra_MayStore; + } + } + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, TLI.getPointerTy())); @@ -6155,7 +6267,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); @@ -6237,7 +6349,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, DAG, AsmNodeOperands); @@ -6268,7 +6380,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { @@ -6308,7 +6420,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -6338,7 +6450,7 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), @@ -6384,7 +6496,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = - getTargetData()->getABITypeAlignment(ArgTy); + getDataLayout()->getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -6398,7 +6510,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setByVal(); PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy)); + Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. unsigned FrameAlign; @@ -6423,12 +6535,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, ExtendKind); + PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), - i < CLI.NumFixedArgs); + i < CLI.NumFixedArgs, + i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) @@ -6504,7 +6617,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, + NumRegs, RegisterVT, VT, NULL, AssertOp)); CurReg += NumRegs; } @@ -6543,7 +6656,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); PendingExports.push_back(Chain); } @@ -6573,7 +6686,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; DebugLoc dl = SDB->getCurDebugLoc(); - const TargetData *TD = TLI.getTargetData(); + const DataLayout *TD = TLI.getDataLayout(); SmallVector Ins; // Check whether the function can return without sret-demotion. @@ -6591,7 +6704,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { ISD::ArgFlagsTy Flags; Flags.setSRet(); EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, true); + ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); Ins.push_back(RetArg); } @@ -6610,15 +6723,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { unsigned OriginalAlignment = TD->getABITypeAlignment(ArgTy); - if (F.paramHasAttr(Idx, Attribute::ZExt)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt)) Flags.setZExt(); - if (F.paramHasAttr(Idx, Attribute::SExt)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) Flags.setSExt(); - if (F.paramHasAttr(Idx, Attribute::InReg)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::InReg)) Flags.setInReg(); - if (F.paramHasAttr(Idx, Attribute::StructRet)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::StructRet)) Flags.setSRet(); - if (F.paramHasAttr(Idx, Attribute::ByVal)) { + if (F.getParamAttributes(Idx).hasAttribute(Attributes::ByVal)) { Flags.setByVal(); PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); @@ -6632,14 +6745,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { FrameAlign = TLI.getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); } - if (F.paramHasAttr(Idx, Attribute::Nest)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::Nest)) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed); + ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, + Idx-1, i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -6685,7 +6799,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, AssertOp); + RegVT, VT, NULL, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -6719,14 +6833,14 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { if (!I->use_empty()) { ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (F.paramHasAttr(Idx, Attribute::SExt)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) AssertOp = ISD::AssertSext; - else if (F.paramHasAttr(Idx, Attribute::ZExt)) + else if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, - AssertOp)); + NULL, AssertOp)); } i += NumParts; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 4090002..9e46d96 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -66,7 +66,7 @@ class ShuffleVectorInst; class SIToFPInst; class StoreInst; class SwitchInst; -class TargetData; +class DataLayout; class TargetLibraryInfo; class TargetLowering; class TruncInst; @@ -150,9 +150,11 @@ private: uint64_t Mask; MachineBasicBlock* BB; unsigned Bits; + uint32_t ExtraWeight; - CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): - Mask(mask), BB(bb), Bits(bits) { } + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, + uint32_t Weight): + Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } }; typedef std::vector CaseVector; @@ -247,11 +249,13 @@ private: typedef std::pair JumpTableBlock; struct BitTestCase { - BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): - Mask(M), ThisBB(T), TargetBB(Tr) { } + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + uint32_t Weight): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; + uint32_t ExtraWeight; }; typedef SmallVector BitTestInfo; @@ -281,7 +285,7 @@ public: const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - const TargetData *TD; + const DataLayout *TD; AliasAnalysis *AA; const TargetLibraryInfo *LibInfo; @@ -325,7 +329,7 @@ public: CodeGenOpt::Level ol) : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), - HasTailCall(false), Context(dag.getContext()) { + HasTailCall(false) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa, @@ -452,6 +456,7 @@ public: void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 13cd011..6f3ce7a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -267,6 +267,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::LIFETIME_START: return "lifetime.start"; + case ISD::LIFETIME_END: return "lifetime.end"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -331,7 +333,7 @@ void SDNode::dump(const SelectionDAG *G) const { } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; + OS << (const void*)this << ": "; for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; @@ -473,11 +475,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << "<" << *M->getMemOperand() << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast(this)) { + int64_t offset = BA->getOffset(); OS << "<"; WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); OS << ", "; WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); OS << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; } @@ -559,7 +566,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, child->printr(OS, G); once.insert(child); } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; + OS << (const void*)child; if (unsigned RN = N->getOperand(i).getResNo()) OS << ":" << RN; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4e5e3ba..c314fa5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -474,6 +474,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MRI.replaceRegWith(From, To); } + // Freeze the set of reserved registers now that MachineFrameInfo has been + // set up. All the information required by getReservedRegs() should be + // available now. + MRI.freezeReservedRegs(*MF); + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -554,7 +559,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getName().str() + ":" + + BlockName = MF->getName().str() + ":" + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber @@ -1209,7 +1214,12 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); } + uint32_t UnhandledWeight = 0; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) + UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { + UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); @@ -1217,12 +1227,14 @@ SelectionDAGISel::FinishBasicBlock() { if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Cases[j+1].ThisBB, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Default, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); @@ -1794,10 +1806,13 @@ WalkChainUsers(const SDNode *ChainedNode, User->getOpcode() == ISD::HANDLENODE) // Root of the graph. continue; - if (User->getOpcode() == ISD::CopyToReg || - User->getOpcode() == ISD::CopyFromReg || - User->getOpcode() == ISD::INLINEASM || - User->getOpcode() == ISD::EH_LABEL) { + unsigned UserOpcode = User->getOpcode(); + if (UserOpcode == ISD::CopyToReg || + UserOpcode == ISD::CopyFromReg || + UserOpcode == ISD::INLINEASM || + UserOpcode == ISD::EH_LABEL || + UserOpcode == ISD::LIFETIME_START || + UserOpcode == ISD::LIFETIME_END) { // If their node ID got reset to -1 then they've already been selected. // Treat them like a MachineOpcode. if (User->getNodeId() == -1) @@ -1994,7 +2009,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, return Res; } -/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +/// CheckSame - Implements OP_CheckSame. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, @@ -2213,6 +2228,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::CopyFromReg: case ISD::CopyToReg: case ISD::EH_LABEL: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. return 0; case ISD::AssertSext: @@ -2981,7 +2998,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); - Msg << "\nIn function: " << MF->getFunction()->getName(); + Msg << "\nIn function: " << MF->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 173ffac..3921635 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -14,7 +14,6 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -50,7 +49,7 @@ namespace llvm { template static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { - return itostr(I - SDNodeIterator::begin((SDNode *) Node)); + return itostr(I - SDNodeIterator::begin((const SDNode *) Node)); } /// edgeTargetsEdgeSource - This method returns true if this outgoing edge @@ -73,7 +72,7 @@ namespace llvm { } static std::string getGraphName(const SelectionDAG *G) { - return G->getMachineFunction().getFunction()->getName(); + return G->getMachineFunction().getName(); } static bool renderGraphFromBottomUp() { @@ -146,7 +145,7 @@ std::string DOTGraphTraits::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), + ViewGraph(this, "dag." + getMachineFunction().getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6820175..49f55e2 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -14,7 +14,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -515,7 +515,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { + : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -583,8 +583,13 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + // + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; @@ -613,6 +618,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ShouldFoldAtomicFences = false; InsertFencesForAtomic = false; SupportJumpTables = true; + MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -624,7 +630,7 @@ TargetLowering::~TargetLowering() { } MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize()); + return MVT::getIntegerVT(8*TD->getPointerSize(0)); } /// canOpTrap - Returns true if the operation can trap for the value type. @@ -772,7 +778,7 @@ void TargetLowering::computeRegisterProperties() { LegalIntReg = IntReg; } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (MVT::SimpleValueType)LegalIntReg; + (const MVT::SimpleValueType)LegalIntReg; ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } @@ -898,10 +904,9 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return NULL; } - EVT TargetLowering::getSetCCResultType(EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); - return PointerTy.SimpleTy; + return getPointerTy(0).SimpleTy; } MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { @@ -997,9 +1002,9 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (attr & Attribute::SExt) + if (attr.hasAttribute(Attributes::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) + else if (attr.hasAttribute(Attributes::ZExt)) ExtendKind = ISD::ZERO_EXTEND; // FIXME: C calling convention requires the return type to be promoted to @@ -1017,18 +1022,17 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) + if (attr.hasAttribute(Attributes::InReg)) Flags.setInReg(); // Propagate extension type if any - if (attr & Attribute::SExt) + if (attr.hasAttribute(Attributes::SExt)) Flags.setSExt(); - else if (attr & Attribute::ZExt) + else if (attr.hasAttribute(Attributes::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) { - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); - } + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); } } @@ -1062,7 +1066,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) - return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0)); return Table; } @@ -2441,7 +2445,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. - uint64_t EqVal; + uint64_t EqVal = 0; switch (getBooleanContents(N0.getValueType().isVector())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: @@ -2954,8 +2958,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true); break; } - } else if (dyn_cast(OpTy)) { - OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize()); + } else if (PointerType *PT = dyn_cast(OpTy)) { + OpInfo.ConstraintVT = MVT::getIntegerVT( + 8*TD->getPointerSize(PT->getAddressSpace())); } else { OpInfo.ConstraintVT = EVT::getEVT(OpTy, true); } diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index a081e3c..f769b44 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : TD(TM.getTargetData()) { + : TD(TM.getDataLayout()) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 21ae2f5..4fbe1b3 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -159,7 +159,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getName().str(); + std::string MFName = MF->getName().str(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -187,7 +187,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { DEBUG(if (ShrinkWrapThisFunction) { dbgs() << "Place CSR spills/restores for " - << MF->getFunction()->getName() << "\n"; + << MF->getName() << "\n"; }); if (calculateSets(Fn)) @@ -364,7 +364,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getName() << ": uses no callee-saved registers\n"); return false; } @@ -384,7 +384,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getName() << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } @@ -466,7 +466,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { } if (allCSRUsesInEntryBlock) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { @@ -478,7 +478,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } @@ -505,7 +505,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in choke point(s) at " << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; @@ -521,7 +521,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { return false; DEBUG({ - dbgs() << "ENABLED: " << Fn.getFunction()->getName(); + dbgs() << "ENABLED: " << Fn.getName(); if (HasFastExitPath) dbgs() << " (fast exit path)"; dbgs() << "\n"; @@ -861,7 +861,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { dbgs() << "-----------------------------------------------------------\n"; dbgs() << "total iterations = " << iterations << " ( " - << Fn.getFunction()->getName() + << Fn.getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; @@ -984,7 +984,7 @@ void PEI::verifySpillRestorePlacement() { if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); - DEBUG(dbgs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getName() << ": " << stringifyCSRegSet(notRestored) << " spilled at " << getBasicBlockName(MBB) << " are never restored on path to return " @@ -1032,7 +1032,7 @@ void PEI::verifySpillRestorePlacement() { } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); - DEBUG(dbgs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getName() << ": " << stringifyCSRegSet(notSpilled) << " restored at " << getBasicBlockName(MBB) << " are never spilled\n"); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 980bd74..4b566fc 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -30,7 +30,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -191,58 +191,43 @@ setupFunctionContext(Function &F, ArrayRef LPads) { // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. unsigned Align = - TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy); + TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); // Fill in the function context structure. - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - Value *Zero = ConstantInt::get(Int32Ty, 0); - Value *One = ConstantInt::get(Int32Ty, 1); - Value *Two = ConstantInt::get(Int32Ty, 2); - Value *Three = ConstantInt::get(Int32Ty, 3); - Value *Four = ConstantInt::get(Int32Ty, 4); - - Value *Idxs[2] = { Zero, 0 }; - for (unsigned I = 0, E = LPads.size(); I != E; ++I) { LandingPadInst *LPI = LPads[I]; IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); // Reference the __data field. - Idxs[1] = Two; - Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data"); + Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Idxs[1] = Zero; - Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep"); + Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, + "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); - ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext())); + ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Idxs[1] = One; - Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep"); + Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, + "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); } // Personality function - Idxs[1] = Three; + IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep", - EntryBB->getTerminator()); - new StoreInst(PersonalityFn, PersonalityFieldPtr, true, - EntryBB->getTerminator()); + Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, + "pers_fn_gep"); + Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address - Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", - EntryBB->getTerminator()); - Idxs[1] = Four; - Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", - EntryBB->getTerminator()); - new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); + Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep"); + Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true); return FuncCtx; } @@ -417,48 +402,31 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - - Value *Idxs[2] = { - ConstantInt::get(Int32Ty, 0), 0 - }; + IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. - Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep", - EntryBB->getTerminator()); + Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", - EntryBB->getTerminator()); + Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); - Value *Val = CallInst::Create(FrameAddrFn, - ConstantInt::get(Int32Ty, 0), - "fp", - EntryBB->getTerminator()); - new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); + Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", - EntryBB->getTerminator()); + Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); - Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); - new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + Val = Builder.CreateCall(StackAddrFn, "sp"); + Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator()); + Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); + Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); // Store a pointer to the function context so that the back-end will know // where to look for it. - Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator()); + Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); + Builder.CreateCall(FuncCtxFn, FuncCtxArg); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index c8c3fb3..95faafab 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -143,6 +143,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SlotIndexes::dump() const { for (IndexList::const_iterator itr = indexList.begin(); itr != indexList.end(); ++itr) { @@ -159,6 +160,7 @@ void SlotIndexes::dump() const { dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' << MBBRanges[i].second << ")\n"; } +#endif // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { @@ -168,9 +170,11 @@ void SlotIndex::print(raw_ostream &os) const { os << "invalid"; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // Dump a SlotIndex to stderr. void SlotIndex::dump() const { print(dbgs()); dbgs() << "\n"; } +#endif diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 4a2b7ec..dca15ee 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -356,6 +356,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit->anyRematerializable(0); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SplitEditor::dump() const { if (RegAssign.empty()) { dbgs() << " empty\n"; @@ -366,6 +367,7 @@ void SplitEditor::dump() const { dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); dbgs() << '\n'; } +#endif VNInfo *SplitEditor::defValue(unsigned RegIdx, const VNInfo *ParentVNI, diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp new file mode 100644 index 0000000..1cbee84 --- /dev/null +++ b/lib/CodeGen/StackColoring.cpp @@ -0,0 +1,783 @@ +//===-- StackColoring.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements the stack-coloring optimization that looks for +// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END), +// which represent the possible lifetime of stack slots. It attempts to +// merge disjoint stack slots and reduce the used stack space. +// NOTE: This pass is not StackSlotColoring, which optimizes spill slots. +// +// TODO: In the future we plan to improve stack coloring in the following ways: +// 1. Allow merging multiple small slots into a single larger slot at different +// offsets. +// 2. Merge this pass with StackSlotColoring and allow merging of allocas with +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackcoloring" +#include "MachineTraceMetrics.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/DebugInfo.h" +#include "llvm/Instructions.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt +DisableColoring("no-stack-coloring", + cl::init(false), cl::Hidden, + cl::desc("Disable stack coloring")); + +/// The user may write code that uses allocas outside of the declared lifetime +/// zone. This can happen when the user returns a reference to a local +/// data-structure. We can detect these cases and decide not to optimize the +/// code. If this flag is enabled, we try to save the user. +static cl::opt +ProtectFromEscapedAllocas("protect-from-escaped-allocas", + cl::init(false), cl::Hidden, + cl::desc("Do not optimize lifetime zones that are broken")); + +STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); +STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); +STATISTIC(StackSlotMerged, "Number of stack slot merged."); +STATISTIC(EscapedAllocas, + "Number of allocas that escaped the lifetime region"); + +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// + +namespace { +/// StackColoring - A machine pass for merging disjoint stack allocations, +/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. +class StackColoring : public MachineFunctionPass { + MachineFrameInfo *MFI; + MachineFunction *MF; + + /// A class representing liveness information for a single basic block. + /// Each bit in the BitVector represents the liveness property + /// for a different stack slot. + struct BlockLifetimeInfo { + /// Which slots BEGINs in each basic block. + BitVector Begin; + /// Which slots ENDs in each basic block. + BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. + BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. + BitVector LiveOut; + }; + + /// Maps active slots (per bit) for each basic block. + DenseMap BlockLiveness; + + /// Maps serial numbers to basic blocks. + DenseMap BasicBlocks; + /// Maps basic blocks to a serial number. + SmallVector BasicBlockNumbering; + + /// Maps liveness intervals for each slot. + SmallVector Intervals; + /// VNInfo is used for the construction of LiveIntervals. + VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. + SlotIndexes *Indexes; + + /// The list of lifetime markers found. These markers are to be removed + /// once the coloring is done. + SmallVector Markers; + + /// SlotSizeSorter - A Sort utility for arranging stack slots according + /// to their size. + struct SlotSizeSorter { + MachineFrameInfo *MFI; + SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { } + bool operator()(int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) return false; + if (RHS == -1) return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + } +}; + +public: + static char ID; + StackColoring() : MachineFunctionPass(ID) { + initializeStackColoringPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + +private: + /// Debug. + void dump(); + + /// Removes all of the lifetime marker instructions from the function. + /// \returns true if any markers were removed. + bool removeAllMarkers(); + + /// Scan the machine function and find all of the lifetime markers. + /// Record the findings in the BEGIN and END vectors. + /// \returns the number of markers found. + unsigned collectMarkers(unsigned NumSlot); + + /// Perform the dataflow calculation and calculate the lifetime for each of + /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and + /// LifetimeLIVE_OUT maps that represent which stack slots are live coming + /// in and out blocks. + void calculateLocalLiveness(); + + /// Construct the LiveIntervals for the slots. + void calculateLiveIntervals(unsigned NumSlots); + + /// Go over the machine function and change instructions which use stack + /// slots to use the joint slots. + void remapInstructions(DenseMap &SlotRemap); + + /// The input program may contain intructions which are not inside lifetime + /// markers. This can happen due to a bug in the compiler or due to a bug in + /// user code (for example, returning a reference to a local variable). + /// This procedure checks all of the instructions in the function and + /// invalidates lifetime ranges which do not contain all of the instructions + /// which access that frame slot. + void removeInvalidSlotRanges(); + + /// Map entries which point to other entries to their destination. + /// A->B->C becomes A->C. + void expungeSlotMap(DenseMap &SlotRemap, unsigned NumSlots); +}; +} // end anonymous namespace + +char StackColoring::ID = 0; +char &llvm::StackColoringID = StackColoring::ID; + +INITIALIZE_PASS_BEGIN(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) + +void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void StackColoring::dump() { + for (df_iterator FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + unsigned Num = BasicBlocks[*FI]; + DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n"); + Num = 0; + DEBUG(dbgs()<<"BEGIN : {"); + for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i) + DEBUG(dbgs()< FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + + // Assign a serial number to this basic block. + BasicBlocks[*FI] = BasicBlockNumbering.size(); + BasicBlockNumbering.push_back(*FI); + + BlockLiveness[*FI].Begin.resize(NumSlot); + BlockLiveness[*FI].End.resize(NumSlot); + + for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); + BI != BE; ++BI) { + + if (BI->getOpcode() != TargetOpcode::LIFETIME_START && + BI->getOpcode() != TargetOpcode::LIFETIME_END) + continue; + + Markers.push_back(BI); + + bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; + MachineOperand &MI = BI->getOperand(0); + unsigned Slot = MI.getIndex(); + + MarkersFound++; + + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs()<<"Found a lifetime marker for slot #"<getName()<<"\n"); + } + + if (IsStart) { + BlockLiveness[*FI].Begin.set(Slot); + } else { + if (BlockLiveness[*FI].Begin.test(Slot)) { + // Allocas that start and end within a single block are handled + // specially when computing the LiveIntervals to avoid pessimizing + // the liveness propagation. + BlockLiveness[*FI].Begin.reset(Slot); + } else { + BlockLiveness[*FI].End.set(Slot); + } + } + } + } + + // Update statistics. + NumMarkerSeen += MarkersFound; + return MarkersFound; +} + +void StackColoring::calculateLocalLiveness() { + // Perform a standard reverse dataflow computation to solve for + // global liveness. The BEGIN set here is equivalent to KILL in the standard + // formulation, and END is equivalent to GEN. The result of this computation + // is a map from blocks to bitvectors where the bitvectors represent which + // allocas are live in/out of that block. + SmallPtrSet BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); + unsigned NumSSMIters = 0; + bool changed = true; + while (changed) { + changed = false; + ++NumSSMIters; + + SmallPtrSet NextBBSet; + + for (SmallVector::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { + + MachineBasicBlock *BB = *PI; + if (!BBSet.count(BB)) continue; + + BitVector LocalLiveIn; + BitVector LocalLiveOut; + + // Forward propagation from begins to ends. + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + LocalLiveIn |= BlockLiveness[*PI].LiveOut; + LocalLiveIn |= BlockLiveness[BB].End; + LocalLiveIn.reset(BlockLiveness[BB].Begin); + + // Reverse propagation from ends to begins. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + LocalLiveOut |= BlockLiveness[*SI].LiveIn; + LocalLiveOut |= BlockLiveness[BB].Begin; + LocalLiveOut.reset(BlockLiveness[BB].End); + + LocalLiveIn |= LocalLiveOut; + LocalLiveOut |= LocalLiveIn; + + // After adopting the live bits, we need to turn-off the bits which + // are de-activated in this block. + LocalLiveOut.reset(BlockLiveness[BB].End); + LocalLiveIn.reset(BlockLiveness[BB].Begin); + + // If we have both BEGIN and END markers in the same basic block then + // we know that the BEGIN marker comes after the END, because we already + // handle the case where the BEGIN comes before the END when collecting + // the markers (and building the BEGIN/END vectore). + // Want to enable the LIVE_IN and LIVE_OUT of slots that have both + // BEGIN and END because it means that the value lives before and after + // this basic block. + BitVector LocalEndBegin = BlockLiveness[BB].End; + LocalEndBegin &= BlockLiveness[BB].Begin; + LocalLiveIn |= LocalEndBegin; + LocalLiveOut |= LocalEndBegin; + + if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) { + changed = true; + BlockLiveness[BB].LiveIn |= LocalLiveIn; + + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + NextBBSet.insert(*PI); + } + + if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) { + changed = true; + BlockLiveness[BB].LiveOut |= LocalLiveOut; + + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + NextBBSet.insert(*SI); + } + } + + BBSet = NextBBSet; + }// while changed. +} + +void StackColoring::calculateLiveIntervals(unsigned NumSlots) { + SmallVector Starts; + SmallVector Finishes; + + // For each block, find which slots are active within this block + // and update the live intervals. + for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end(); + MBB != MBBe; ++MBB) { + Starts.clear(); + Starts.resize(NumSlots); + Finishes.clear(); + Finishes.resize(NumSlots); + + // Create the interval for the basic blocks with lifetime markers in them. + for (SmallVector::iterator it = Markers.begin(), + e = Markers.end(); it != e; ++it) { + MachineInstr *MI = *it; + if (MI->getParent() != MBB) + continue; + + assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || + MI->getOpcode() == TargetOpcode::LIFETIME_END) && + "Invalid Lifetime marker"); + + bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; + MachineOperand &Mo = MI->getOperand(0); + int Slot = Mo.getIndex(); + assert(Slot >= 0 && "Invalid slot"); + + SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); + + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } + } + + // Create the interval of the blocks that we previously found to be 'alive'. + BitVector Alive = BlockLiveness[MBB].LiveIn; + Alive |= BlockLiveness[MBB].LiveOut; + + if (Alive.any()) { + for (int pos = Alive.find_first(); pos != -1; + pos = Alive.find_next(pos)) { + if (!Starts[pos].isValid()) + Starts[pos] = Indexes->getMBBStartIdx(MBB); + if (!Finishes[pos].isValid()) + Finishes[pos] = Indexes->getMBBEndIdx(MBB); + } + } + + for (unsigned i = 0; i < NumSlots; ++i) { + assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); + if (!Starts[i].isValid()) + continue; + + assert(Starts[i] && Finishes[i] && "Invalid interval"); + VNInfo *ValNum = Intervals[i]->getValNumInfo(0); + SlotIndex S = Starts[i]; + SlotIndex F = Finishes[i]; + if (S < F) { + // We have a single consecutive region. + Intervals[i]->addRange(LiveRange(S, F, ValNum)); + } else { + // We have two non consecutive regions. This happens when + // LIFETIME_START appears after the LIFETIME_END marker. + SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); + SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); + Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); + Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + } + } + } +} + +bool StackColoring::removeAllMarkers() { + unsigned Count = 0; + for (unsigned i = 0; i < Markers.size(); ++i) { + Markers[i]->eraseFromParent(); + Count++; + } + Markers.clear(); + + DEBUG(dbgs()<<"Removed "< &SlotRemap) { + unsigned FixedInstr = 0; + unsigned FixedMemOp = 0; + unsigned FixedDbg = 0; + MachineModuleInfo *MMI = &MF->getMMI(); + + // Remap debug information that refers to stack slots. + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + const MDNode *Var = VI->first; + if (!Var) continue; + std::pair &VP = VI->second; + if (SlotRemap.count(VP.first)) { + DEBUG(dbgs()<<"Remapping debug info for ["<getName()<<"].\n"); + VP.first = SlotRemap[VP.first]; + FixedDbg++; + } + } + + // Keep a list of *allocas* which need to be remapped. + DenseMap Allocas; + for (DenseMap::iterator it = SlotRemap.begin(), + e = SlotRemap.end(); it != e; ++it) { + const AllocaInst *From = MFI->getObjectAllocation(it->first); + const AllocaInst *To = MFI->getObjectAllocation(it->second); + assert(To && From && "Invalid allocation object"); + Allocas[From] = To; + } + + // Remap all instructions to the new stack slots. + MachineFunction::iterator BB, BBE; + MachineBasicBlock::iterator I, IE; + for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + // Skip lifetime markers. We'll remove them soon. + if (I->getOpcode() == TargetOpcode::LIFETIME_START || + I->getOpcode() == TargetOpcode::LIFETIME_END) + continue; + + // Update the MachineMemOperand to use the new alloca. + for (MachineInstr::mmo_iterator MM = I->memoperands_begin(), + E = I->memoperands_end(); MM != E; ++MM) { + MachineMemOperand *MMO = *MM; + + const Value *V = MMO->getValue(); + + if (!V) + continue; + + // Climb up and find the original alloca. + V = GetUnderlyingObject(V); + // If we did not find one, or if the one that we found is not in our + // map, then move on. + if (!V || !isa(V)) { + // Clear mem operand since we don't know for sure that it doesn't + // alias a merged alloca. + MMO->setValue(0); + continue; + } + const AllocaInst *AI= cast(V); + if (!Allocas.count(AI)) + continue; + + MMO->setValue(Allocas[AI]); + FixedMemOp++; + } + + // Update all of the machine instruction operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + int FromSlot = MO.getIndex(); + + // Don't touch arguments. + if (FromSlot<0) + continue; + + // Only look at mapped slots. + if (!SlotRemap.count(FromSlot)) + continue; + + // In a debug build, check that the instruction that we are modifying is + // inside the expected live range. If the instruction is not inside + // the calculated range then it means that the alloca usage moved + // outside of the lifetime markers, or that the user has a bug. + // NOTE: Alloca address calculations which happen outside the lifetime + // zone are are okay, despite the fact that we don't have a good way + // for validating all of the usages of the calculation. +#ifndef NDEBUG + bool TouchesMemory = I->mayLoad() || I->mayStore(); + // If we *don't* protect the user from escaped allocas, don't bother + // validating the instructions. + if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { + SlotIndex Index = Indexes->getInstructionIndex(I); + LiveInterval *Interval = Intervals[FromSlot]; + assert(Interval->find(Index) != Interval->end() && + "Found instruction usage outside of live range."); + } +#endif + + // Fix the machine instructions. + int ToSlot = SlotRemap[FromSlot]; + MO.setIndex(ToSlot); + FixedInstr++; + } + } + + DEBUG(dbgs()<<"Fixed "<begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + if (I->getOpcode() == TargetOpcode::LIFETIME_START || + I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue()) + continue; + + // Some intervals are suspicious! In some cases we find address + // calculations outside of the lifetime zone, but not actual memory + // read or write. Memory accesses outside of the lifetime zone are a clear + // violation, but address calculations are okay. This can happen when + // GEPs are hoisted outside of the lifetime zone. + // So, in here we only check instructions which can read or write memory. + if (!I->mayLoad() && !I->mayStore()) + continue; + + // Check all of the machine operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + + int Slot = MO.getIndex(); + + if (Slot<0) + continue; + + if (Intervals[Slot]->empty()) + continue; + + // Check that the used slot is inside the calculated lifetime range. + // If it is not, warn about it and invalidate the range. + LiveInterval *Interval = Intervals[Slot]; + SlotIndex Index = Indexes->getInstructionIndex(I); + if (Interval->find(Index) == Interval->end()) { + Intervals[Slot]->clear(); + DEBUG(dbgs()<<"Invalidating range #"< &SlotRemap, + unsigned NumSlots) { + // Expunge slot remap map. + for (unsigned i=0; i < NumSlots; ++i) { + // If we are remapping i + if (SlotRemap.count(i)) { + int Target = SlotRemap[i]; + // As long as our target is mapped to something else, follow it. + while (SlotRemap.count(Target)) { + Target = SlotRemap[Target]; + SlotRemap[i] = Target; + } + } + } +} + +bool StackColoring::runOnMachineFunction(MachineFunction &Func) { + DEBUG(dbgs() << "********** Stack Coloring **********\n" + << "********** Function: " + << ((const Value*)Func.getFunction())->getName() << '\n'); + MF = &Func; + MFI = MF->getFrameInfo(); + Indexes = &getAnalysis(); + BlockLiveness.clear(); + BasicBlocks.clear(); + BasicBlockNumbering.clear(); + Markers.clear(); + Intervals.clear(); + VNInfoAllocator.Reset(); + + unsigned NumSlots = MFI->getObjectIndexEnd(); + + // If there are no stack slots then there are no markers to remove. + if (!NumSlots) + return false; + + SmallVector SortedSlots; + + SortedSlots.reserve(NumSlots); + Intervals.reserve(NumSlots); + + unsigned NumMarkers = collectMarkers(NumSlots); + + unsigned TotalSize = 0; + DEBUG(dbgs()<<"Found "<getObjectIndexEnd(); ++i) { + DEBUG(dbgs()<<"Slot #"<getObjectSize(i)<<" bytes.\n"); + TotalSize += MFI->getObjectSize(i); + } + + DEBUG(dbgs()<<"Total Stack size: "<getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); + SortedSlots.push_back(i); + } + + // Calculate the liveness of each block. + calculateLocalLiveness(); + + // Propagate the liveness information. + calculateLiveIntervals(NumSlots); + + // Search for allocas which are used outside of the declared lifetime + // markers. + if (ProtectFromEscapedAllocas) + removeInvalidSlotRanges(); + + // Maps old slots to new slots. + DenseMap SlotRemap; + unsigned RemovedSlots = 0; + unsigned ReducedSize = 0; + + // Do not bother looking at empty intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + if (Intervals[SortedSlots[I]]->empty()) + SortedSlots[I] = -1; + } + + // This is a simple greedy algorithm for merging allocas. First, sort the + // slots, placing the largest slots first. Next, perform an n^2 scan and look + // for disjoint slots. When you find disjoint slots, merge the samller one + // into the bigger one and update the live interval. Remove the small alloca + // and continue. + + // Sort the slots according to their size. Place unused slots at the end. + std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI)); + + bool Chanded = true; + while (Chanded) { + Chanded = false; + for (unsigned I = 0; I < NumSlots; ++I) { + if (SortedSlots[I] == -1) + continue; + + for (unsigned J=I+1; J < NumSlots; ++J) { + if (SortedSlots[J] == -1) + continue; + + int FirstSlot = SortedSlots[I]; + int SecondSlot = SortedSlots[J]; + LiveInterval *First = Intervals[FirstSlot]; + LiveInterval *Second = Intervals[SecondSlot]; + assert (!First->empty() && !Second->empty() && "Found an empty range"); + + // Merge disjoint slots. + if (!First->overlaps(*Second)) { + Chanded = true; + First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + SlotRemap[SecondSlot] = FirstSlot; + SortedSlots[J] = -1; + DEBUG(dbgs()<<"Merging #"<getObjectAlignment(FirstSlot), + MFI->getObjectAlignment(SecondSlot)); + + assert(MFI->getObjectSize(FirstSlot) >= + MFI->getObjectSize(SecondSlot) && + "Merging a small object into a larger one"); + + RemovedSlots+=1; + ReducedSize += MFI->getObjectSize(SecondSlot); + MFI->setObjectAlignment(FirstSlot, MaxAlignment); + MFI->RemoveStackObject(SecondSlot); + } + } + } + }// While changed. + + // Record statistics. + StackSpaceSaved += ReducedSize; + StackSlotMerged += RemovedSlots; + DEBUG(dbgs()<<"Merge "< -SSPBufferSize("stack-protector-buffer-size", cl::init(8), - cl::desc("Lower bound for a buffer to be considered for " - "stack protection")); - namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -61,6 +55,11 @@ namespace { /// check fails. BasicBlock *CreateFailBB(); + /// ContainsProtectableArray - Check whether the type either is an array or + /// contains an array of sufficient size so that we need stack protectors + /// for it. + bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const; + /// RequiresStackProtector - Check whether or not this function needs a /// stack protector based upon the stack protector level. bool RequiresStackProtector() const; @@ -100,21 +99,50 @@ bool StackProtector::runOnFunction(Function &Fn) { return InsertStackProtectors(); } +/// ContainsProtectableArray - Check whether the type either is an array or +/// contains a char array of sufficient size so that we need stack protectors +/// for it. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { + if (!Ty) return false; + if (ArrayType *AT = dyn_cast(Ty)) { + const TargetMachine &TM = TLI->getTargetMachine(); + if (!AT->getElementType()->isIntegerTy(8)) { + Triple Trip(TM.getTargetTriple()); + + // If we're on a non-Darwin platform or we're inside of a structure, don't + // add stack protectors unless the array is a character array. + if (InStruct || !Trip.isOSDarwin()) + return false; + } + + // If an array has more than SSPBufferSize bytes of allocated space, then we + // emit stack protectors. + if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) + return true; + } + + const StructType *ST = dyn_cast(Ty); + if (!ST) return false; + + for (StructType::element_iterator I = ST->element_begin(), + E = ST->element_end(); I != E; ++I) + if (ContainsProtectableArray(*I, true)) + return true; + + return false; +} + /// RequiresStackProtector - Check whether or not this function needs a stack /// protector based upon the stack protector level. The heuristic we use is to /// add a guard variable to functions that call alloca, and functions with /// buffers larger than SSPBufferSize bytes. bool StackProtector::RequiresStackProtector() const { - if (F->hasFnAttr(Attribute::StackProtectReq)) + if (F->getFnAttributes().hasAttribute(Attributes::StackProtectReq)) return true; - if (!F->hasFnAttr(Attribute::StackProtect)) + if (!F->getFnAttributes().hasAttribute(Attributes::StackProtect)) return false; - const TargetData *TD = TLI->getTargetData(); - const TargetMachine &TM = TLI->getTargetMachine(); - Triple Trip(TM.getTargetTriple()); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; @@ -126,17 +154,8 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast(AI->getAllocatedType())) { - // If we're on a non-Darwin platform, don't add stack protectors - // unless the array is a character array. - if (!Trip.isOSDarwin() && !AT->getElementType()->isIntegerTy(8)) - continue; - - // If an array has more than SSPBufferSize bytes of allocated space, - // then we emit stack protectors. - if (SSPBufferSize <= TD->getTypeAllocSize(AT)) - return true; - } + if (ContainsProtectableArray(AI->getAllocatedType())) + return true; } } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 20da36e..d349abc 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -11,8 +11,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackcoloring" -#include "llvm/Function.h" +#define DEBUG_TYPE "stackslotcoloring" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -391,8 +390,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Stack Slot Coloring **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'; + << "********** Function: " << MF.getName() << '\n'; }); MFI = MF.getFrameInfo(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 5b06195..39fd600 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -404,9 +404,9 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { } void StrongPHIElimination::addReg(unsigned Reg) { - if (RegNodeMap.count(Reg)) - return; - RegNodeMap[Reg] = new (Allocator) Node(Reg); + Node *&N = RegNodeMap[Reg]; + if (!N) + N = new (Allocator) Node(Reg); } StrongPHIElimination::Node* @@ -714,8 +714,9 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, assert(getRegColor(CopyReg) == CopyReg); } - if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor))) - InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr; + // Insert into map if not already there. + InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), + CopyInstr)); } SrcMO.setReg(CopyReg); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index a813fa6..1497d1b 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -552,7 +552,8 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && - MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index ddee6b2..4439192 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -99,17 +99,8 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, if (NewMI) { // Create a new instruction. - bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false; MachineFunction &MF = *MI->getParent()->getParent(); - if (HasDef) - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0) - .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) - .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); - else - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) - .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); + MI = MF.CloneMachineInstr(MI); } if (HasDef) { @@ -572,6 +563,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, /// Return the default expected latency for a def based on it's opcode. unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, const MachineInstr *DefMI) const { + if (DefMI->isTransient()) + return 0; if (DefMI->mayLoad()) return SchedModel->LoadLatency; if (isHighLatencyDef(DefMI->getOpcode())) @@ -615,13 +608,13 @@ getOperandLatency(const InstrItineraryData *ItinData, /// If we can determine the operand latency from the def only, without itinerary /// lookup, do so. Otherwise return -1. -static int computeDefOperandLatency( - const TargetInstrInfo *TII, const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) { +int TargetInstrInfo::computeDefOperandLatency( + const InstrItineraryData *ItinData, + const MachineInstr *DefMI, bool FindMin) const { // Let the target hook getInstrLatency handle missing itineraries. if (!ItinData) - return TII->getInstrLatency(ItinData, DefMI); + return getInstrLatency(ItinData, DefMI); // Return a latency based on the itinerary properties and defining instruction // if possible. Some common subtargets don't require per-operand latency, @@ -630,7 +623,7 @@ static int computeDefOperandLatency( // If MinLatency is valid, call getInstrLatency. This uses Stage latency if // it exists before defaulting to MinLatency. if (ItinData->SchedModel->MinLatency >= 0) - return TII->getInstrLatency(ItinData, DefMI); + return getInstrLatency(ItinData, DefMI); // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. // For empty itineraries, short-cirtuit the check and default to one cycle. @@ -638,29 +631,42 @@ static int computeDefOperandLatency( return 1; } else if(ItinData->isEmpty()) - return TII->defaultDefLatency(ItinData->SchedModel, DefMI); + return defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required return -1; } /// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use when the operand indices are already known. +/// dependent def and use when the operand indices are already known. UseMI may +/// be NULL for an unknown use. +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. /// -/// FindMin may be set to get the minimum vs. expected latency. +/// Depending on the subtarget's itinerary properties, this may or may not need +/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or +/// UseIdx to compute min latency. unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx, bool FindMin) const { - int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); + int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); if (DefLatency >= 0) return DefLatency; assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + int OperLatency = 0; + if (UseMI) + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } if (OperLatency >= 0) return OperLatency; @@ -673,77 +679,3 @@ computeOperandLatency(const InstrItineraryData *ItinData, defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } - -/// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an -/// unknown use. Depending on the subtarget's itinerary properties, this may or -/// may not need to call getOperandLatency(). -/// -/// FindMin may be set to get the minimum vs. expected latency. Minimum -/// latency is used for scheduling groups, while expected latency is for -/// instruction cost and critical path. -/// -/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency. -/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use. -unsigned TargetInstrInfo:: -computeOperandLatency(const InstrItineraryData *ItinData, - const TargetRegisterInfo *TRI, - const MachineInstr *DefMI, const MachineInstr *UseMI, - unsigned Reg, bool FindMin) const { - - int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; - - assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - - // Find the definition of the register in the defining instruction. - int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1) { - const MachineOperand &MO = DefMI->getOperand(DefIdx); - if (MO.isReg() && MO.isImplicit() && - DefIdx >= (int)DefMI->getDesc().getNumOperands()) { - // This is an implicit def, getOperandLatency() won't return the correct - // latency. e.g. - // %D6, %D7 = VLD1q16 %R2, 0, ..., %Q3 - // %Q1 = VMULv8i16 %Q1, %Q3, ... - // What we want is to compute latency between def of %D6/%D7 and use of - // %Q3 instead. - unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); - if (DefMI->getOperand(Op2).isReg()) - DefIdx = Op2; - } - // For all uses of the register, calculate the maxmimum latency - int OperLatency = -1; - - // UseMI is null, then it must be a scheduling barrier. - if (!UseMI) { - unsigned DefClass = DefMI->getDesc().getSchedClass(); - OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); - } - else { - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (MOReg != Reg) - continue; - - int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i); - OperLatency = std::max(OperLatency, UseCycle); - } - } - // If we found an operand latency, we're done. - if (OperLatency >= 0) - return OperLatency; - } - // No operand latency was found. - unsigned InstrLatency = getInstrLatency(ItinData, DefMI); - - // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); - return InstrLatency; -} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 2a2fa9e..8f5d770 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -27,7 +27,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Dwarf.h" @@ -77,9 +77,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Flags, SectionKind::getDataRel(), 0, Label->getName()); - unsigned Size = TM.getTargetData()->getPointerSize(); + unsigned Size = TM.getDataLayout()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); @@ -247,7 +247,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: this is getting the alignment of the character, not the // alignment of the global! unsigned Align = - TM.getTargetData()->getPreferredAlignment(cast(GV)); + TM.getDataLayout()->getPreferredAlignment(cast(GV)); const char *SizeSpec = ".rodata.str1."; if (Kind.isMergeable2ByteCString()) @@ -522,14 +522,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - TM.getTargetData()->getPreferredAlignment(cast(GV)) < 32) + TM.getDataLayout()->getPreferredAlignment(cast(GV)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - TM.getTargetData()->getPreferredAlignment(cast(GV)) < 32) + TM.getDataLayout()->getPreferredAlignment(cast(GV)) < 32) return UStringSection; if (Kind.isMergeableConst()) { diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp new file mode 100644 index 0000000..ca3b0e0 --- /dev/null +++ b/lib/CodeGen/TargetSchedule.cpp @@ -0,0 +1,306 @@ +//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a wrapper around MCSchedModel that allows the interface +// to benefit from information currently only available in TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), + cl::desc("Use TargetSchedModel for latency lookup")); + +static cl::opt EnableSchedItins("scheditins", cl::Hidden, cl::init(true), + cl::desc("Use InstrItineraryData for latency lookup")); + +bool TargetSchedModel::hasInstrSchedModel() const { + return EnableSchedModel && SchedModel.hasInstrSchedModel(); +} + +bool TargetSchedModel::hasInstrItineraries() const { + return EnableSchedItins && !InstrItins.isEmpty(); +} + +static unsigned gcd(unsigned Dividend, unsigned Divisor) { + // Dividend and Divisor will be naturally swapped as needed. + while(Divisor) { + unsigned Rem = Dividend % Divisor; + Dividend = Divisor; + Divisor = Rem; + }; + return Dividend; +} +static unsigned lcm(unsigned A, unsigned B) { + unsigned LCM = (uint64_t(A) * B) / gcd(A, B); + assert((LCM >= A && LCM >= B) && "LCM overflow"); + return LCM; +} + +void TargetSchedModel::init(const MCSchedModel &sm, + const TargetSubtargetInfo *sti, + const TargetInstrInfo *tii) { + SchedModel = sm; + STI = sti; + TII = tii; + STI->initInstrItins(InstrItins); + + unsigned NumRes = SchedModel.getNumProcResourceKinds(); + ResourceFactors.resize(NumRes); + ResourceLCM = SchedModel.IssueWidth; + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; + if (NumUnits > 0) + ResourceLCM = lcm(ResourceLCM, NumUnits); + } + MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; + ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0; + } +} + +unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, + const MCSchedClassDesc *SC) const { + if (hasInstrItineraries()) { + int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); + } + if (hasInstrSchedModel()) { + if (!SC) + SC = resolveSchedClass(MI); + if (SC->isValid()) + return SC->NumMicroOps; + } + return MI->isTransient() ? 0 : 1; +} + +// The machine model may explicitly specify an invalid latency, which +// effectively means infinite latency. Since users of the TargetSchedule API +// don't know how to handle this, we convert it to a very large latency that is +// easy to distinguish when debugging the DAG but won't induce overflow. +static unsigned convertLatency(int Cycles) { + return Cycles >= 0 ? Cycles : 1000; +} + +/// If we can determine the operand latency from the def only, without machine +/// model or itinerary lookup, do so. Otherwise return -1. +int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, + bool FindMin) const { + + // Return a latency based on the itinerary properties and defining instruction + // if possible. Some common subtargets don't require per-operand latency, + // especially for minimum latencies. + if (FindMin) { + // If MinLatency is invalid, then use the itinerary for MinLatency. If no + // itinerary exists either, then use single cycle latency. + if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) { + return 1; + } + return SchedModel.MinLatency; + } + else if (!hasInstrSchedModel() && !hasInstrItineraries()) { + return TII->defaultDefLatency(&SchedModel, DefMI); + } + // ...operand lookup required + return -1; +} + +/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require +/// evaluation of predicates that depend on instruction operands or flags. +const MCSchedClassDesc *TargetSchedModel:: +resolveSchedClass(const MachineInstr *MI) const { + + // Get the definition's scheduling class descriptor from this machine model. + unsigned SchedClass = MI->getDesc().getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); + +#ifndef NDEBUG + unsigned NIter = 0; +#endif + while (SCDesc->isVariant()) { + assert(++NIter < 6 && "Variants are nested deeper than the magic number"); + + SchedClass = STI->resolveSchedClass(SchedClass, MI, this); + SCDesc = SchedModel.getSchedClassDesc(SchedClass); + } + return SCDesc; +} + +/// Find the def index of this operand. This index maps to the machine model and +/// is independent of use operands. Def operands may be reordered with uses or +/// merged with uses without affecting the def index (e.g. before/after +/// regalloc). However, an instruction's def operands must never be reordered +/// with respect to each other. +static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { + unsigned DefIdx = 0; + for (unsigned i = 0; i != DefOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) + ++DefIdx; + } + return DefIdx; +} + +/// Find the use index of this operand. This is independent of the instruction's +/// def operands. +/// +/// Note that uses are not determined by the operand's isUse property, which +/// is simply the inverse of isDef. Here we consider any readsReg operand to be +/// a "use". The machine model allows an operand to be both a Def and Use. +static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { + unsigned UseIdx = 0; + for (unsigned i = 0; i != UseOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.readsReg()) + ++UseIdx; + } + return UseIdx; +} + +// Top-level API for clients that know the operand indices. +unsigned TargetSchedModel::computeOperandLatency( + const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *UseMI, unsigned UseOperIdx, + bool FindMin) const { + + int DefLatency = getDefLatency(DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + if (hasInstrItineraries()) { + int OperLatency = 0; + if (UseMI) { + OperLatency = + TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); + } + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); + } + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + // Rather than directly querying InstrItins stage latency, we call a TII + // hook to allow subtargets to specialize latency. This hook is only + // applicable to the InstrItins model. InstrSchedModel should model all + // special cases without TII hooks. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(&SchedModel, DefMI)); + return InstrLatency; + } + assert(!FindMin && hasInstrSchedModel() && + "Expected a SchedModel for this cpu"); + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); + if (DefIdx < SCDesc->NumWriteLatencyEntries) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + unsigned WriteID = WLEntry->WriteResourceID; + unsigned Latency = convertLatency(WLEntry->Cycles); + if (!UseMI) + return Latency; + + // Lookup the use's latency adjustment in SubtargetInfo. + const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); + if (UseDesc->NumReadAdvanceEntries == 0) + return Latency; + unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); + return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + } + // If DefIdx does not exist in the model (e.g. implicit defs), then return + // unit latency (defaultDefLatency may be too conservative). +#ifndef NDEBUG + if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { + std::string Err; + raw_string_ostream ss(Err); + ss << "DefIdx " << DefIdx << " exceeds machine model writes for " + << *DefMI; + report_fatal_error(ss.str()); + } +#endif + return DefMI->isTransient() ? 0 : 1; +} + +unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { + // For the itinerary model, fall back to the old subtarget hook. + // Allow subtargets to compute Bundle latencies outside the machine model. + if (hasInstrItineraries() || MI->isBundle()) + return TII->getInstrLatency(&InstrItins, MI); + + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); + if (SCDesc->isValid()) { + unsigned Latency = 0; + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); + } + return Latency; + } + } + return TII->defaultDefLatency(&SchedModel, MI); +} + +unsigned TargetSchedModel:: +computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *DepMI) const { + // MinLatency == -1 is for in-order processors that always have unit + // MinLatency. MinLatency > 0 is for in-order processors with varying min + // latencies, but since this is not a RAW dep, we always use unit latency. + if (SchedModel.MinLatency != 0) + return 1; + + // MinLatency == 0 indicates an out-of-order processor that can dispatch + // WAW dependencies in the same cycle. + + // Treat predication as a data dependency for out-of-order cpus. In-order + // cpus do not need to treat predicated writes specially. + // + // TODO: The following hack exists because predication passes do not + // correctly append imp-use operands, and readsReg() strangely returns false + // for predicated defs. + unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); + const MachineFunction &MF = *DefMI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) + return computeInstrLatency(DefMI); + + // If we have a per operand scheduling model, check if this def is writing + // an unbuffered resource. If so, it treated like an in-order cpu. + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + if (SCDesc->isValid()) { + for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), + *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) + return 1; + } + } + } + return 0; +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index aa601af..a9058bc 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -60,116 +60,108 @@ STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { - class TwoAddressInstructionPass : public MachineFunctionPass { - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const InstrItineraryData *InstrItins; - MachineRegisterInfo *MRI; - LiveVariables *LV; - SlotIndexes *Indexes; - LiveIntervals *LIS; - AliasAnalysis *AA; - CodeGenOpt::Level OptLevel; - - // DistanceMap - Keep track the distance of a MI from the start of the - // current basic block. - DenseMap DistanceMap; - - // SrcRegMap - A map from virtual registers to physical registers which - // are likely targets to be coalesced to due to copies from physical - // registers to virtual registers. e.g. v1024 = move r0. - DenseMap SrcRegMap; - - // DstRegMap - A map from virtual registers to physical registers which - // are likely targets to be coalesced to due to copies to physical - // registers from virtual registers. e.g. r1 = move v1024. - DenseMap DstRegMap; - - /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen - /// during the initial walk of the machine function. - SmallVector RegSequences; - - bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, - unsigned Reg, - MachineBasicBlock::iterator OldPos); - - bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, - unsigned &LastDef); - - bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, - MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Dist); +class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; + MachineRegisterInfo *MRI; + LiveVariables *LV; + SlotIndexes *Indexes; + LiveIntervals *LIS; + AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; + + // The current basic block being processed. + MachineBasicBlock *MBB; + + // DistanceMap - Keep track the distance of a MI from the start of the + // current basic block. + DenseMap DistanceMap; + + // Set of already processed instructions in the current block. + SmallPtrSet Processed; - bool CommuteInstruction(MachineBasicBlock::iterator &mi, - MachineFunction::iterator &mbbi, - unsigned RegB, unsigned RegC, unsigned Dist); + // SrcRegMap - A map from virtual registers to physical registers which are + // likely targets to be coalesced to due to copies from physical registers to + // virtual registers. e.g. v1024 = move r0. + DenseMap SrcRegMap; - bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); + // DstRegMap - A map from virtual registers to physical registers which are + // likely targets to be coalesced to due to copies to physical registers from + // virtual registers. e.g. r1 = move v1024. + DenseMap DstRegMap; - bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned RegA, unsigned RegB, unsigned Dist); + /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen + /// during the initial walk of the machine function. + SmallVector RegSequences; - bool isDefTooClose(unsigned Reg, unsigned Dist, - MachineInstr *MI, MachineBasicBlock *MBB); + bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, + MachineBasicBlock::iterator OldPos); - bool RescheduleMIBelowKill(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg); - bool RescheduleKillAboveMI(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg); + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); - bool TryInstructionTransform(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, - SmallPtrSet &Processed); + bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist); - void ScanUses(unsigned DstReg, MachineBasicBlock *MBB, - SmallPtrSet &Processed); + bool commuteInstruction(MachineBasicBlock::iterator &mi, + unsigned RegB, unsigned RegC, unsigned Dist); - void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet &Processed); + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); - typedef SmallVector, 4> TiedPairList; - typedef SmallDenseMap TiedOperandMap; - bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); - void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned RegA, unsigned RegB, unsigned Dist); - void CoalesceExtSubRegs(SmallVector &Srcs, unsigned DstReg); + bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI); - /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part - /// of the de-ssa process. This replaces sources of REG_SEQUENCE as - /// sub-register references of the register defined by REG_SEQUENCE. - bool EliminateRegSequences(); + bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); - public: - static char ID; // Pass identification, replacement for typeid - TwoAddressInstructionPass() : MachineFunctionPass(ID) { - initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); - } + bool tryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } + void scanUses(unsigned DstReg); - /// runOnMachineFunction - Pass entry point. - bool runOnMachineFunction(MachineFunction&); - }; -} + void processCopy(MachineInstr *MI); + + typedef SmallVector, 4> TiedPairList; + typedef SmallDenseMap TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + + /// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part of + /// the de-ssa process. This replaces sources of REG_SEQUENCE as sub-register + /// references of the register defined by REG_SEQUENCE. + bool eliminateRegSequences(); + +public: + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(ID) { + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - Pass entry point. + bool runOnMachineFunction(MachineFunction&); +}; +} // end anonymous namespace char TwoAddressInstructionPass::ID = 0; INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", @@ -180,13 +172,13 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; -/// Sink3AddrInstruction - A two-address instruction has been converted to a +/// sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it /// past the instruction that would kill the above mentioned register to reduce /// register pressure. -bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, - MachineInstr *MI, unsigned SavedReg, - MachineBasicBlock::iterator OldPos) { +bool TwoAddressInstructionPass:: +sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, + MachineBasicBlock::iterator OldPos) { // FIXME: Shouldn't we be trying to do this before we three-addressify the // instruction? After this transformation is done, we no longer need // the instruction to be in three-address form. @@ -299,13 +291,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, return true; } -/// NoUseAfterLastDef - Return true if there are no intervening uses between the +/// noUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last /// def location by reference -bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, - MachineBasicBlock *MBB, unsigned Dist, - unsigned &LastDef) { +bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, + unsigned &LastDef) { LastDef = 0; unsigned LastUse = Dist; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), @@ -465,10 +456,9 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { /// isProfitableToCommute - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool -TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, - unsigned regC, - MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Dist) { +TwoAddressInstructionPass:: +isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist) { if (OptLevel == CodeGenOpt::None) return false; @@ -516,13 +506,13 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, // If there is a use of regC between its last def (could be livein) and this // instruction, then bail. unsigned LastDefC = 0; - if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC)) + if (!noUseAfterLastDef(regC, Dist, LastDefC)) return false; // If there is a use of regB between its last def (could be livein) and this // instruction, then go ahead and make this transformation. unsigned LastDefB = 0; - if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB)) + if (!noUseAfterLastDef(regB, Dist, LastDefB)) return true; // Since there are no intervening uses for both registers, then commute @@ -530,13 +520,12 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, return LastDefB && LastDefC && LastDefC > LastDefB; } -/// CommuteInstruction - Commute a two-address instruction and update the basic +/// commuteInstruction - Commute a two-address instruction and update the basic /// block, distance map, and live variables if needed. Return true if it is /// successful. -bool -TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, - MachineFunction::iterator &mbbi, - unsigned RegB, unsigned RegC, unsigned Dist) { +bool TwoAddressInstructionPass:: +commuteInstruction(MachineBasicBlock::iterator &mi, + unsigned RegB, unsigned RegC, unsigned Dist) { MachineInstr *MI = mi; DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); @@ -555,8 +544,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, if (Indexes) Indexes->replaceMachineInstrInMaps(MI, NewMI); - mbbi->insert(mi, NewMI); // Insert the new inst - mbbi->erase(mi); // Nuke the old inst. + MBB->insert(mi, NewMI); // Insert the new inst + MBB->erase(mi); // Nuke the old inst. mi = NewMI; DistanceMap.insert(std::make_pair(NewMI, Dist)); } @@ -588,51 +577,51 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } -/// ConvertInstTo3Addr - Convert the specified two-address instruction into a +/// convertInstTo3Addr - Convert the specified two-address instruction into a /// three address one. Return true if this transformation was successful. bool -TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, +TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, unsigned RegA, unsigned RegB, unsigned Dist) { - MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); - if (NewMI) { - DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); - DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); - bool Sunk = false; + // FIXME: Why does convertToThreeAddress() need an iterator reference? + MachineFunction::iterator MFI = MBB; + MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV); + assert(MBB == MFI && "convertToThreeAddress changed iterator reference"); + if (!NewMI) + return false; - if (Indexes) - Indexes->replaceMachineInstrInMaps(mi, NewMI); + DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); + bool Sunk = false; - if (NewMI->findRegisterUseOperand(RegB, false, TRI)) - // FIXME: Temporary workaround. If the new instruction doesn't - // uses RegB, convertToThreeAddress must have created more - // then one instruction. - Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi); + if (Indexes) + Indexes->replaceMachineInstrInMaps(mi, NewMI); - mbbi->erase(mi); // Nuke the old inst. + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) + // FIXME: Temporary workaround. If the new instruction doesn't + // uses RegB, convertToThreeAddress must have created more + // then one instruction. + Sunk = sink3AddrInstruction(NewMI, RegB, mi); - if (!Sunk) { - DistanceMap.insert(std::make_pair(NewMI, Dist)); - mi = NewMI; - nmi = llvm::next(mi); - } + MBB->erase(mi); // Nuke the old inst. - // Update source and destination register maps. - SrcRegMap.erase(RegA); - DstRegMap.erase(RegB); - return true; + if (!Sunk) { + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = llvm::next(mi); } - return false; + // Update source and destination register maps. + SrcRegMap.erase(RegA); + DstRegMap.erase(RegB); + return true; } -/// ScanUses - Scan forward recursively for only uses, update maps if the use +/// scanUses - Scan forward recursively for only uses, update maps if the use /// is a copy or a two-address instruction. void -TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, - SmallPtrSet &Processed) { +TwoAddressInstructionPass::scanUses(unsigned DstReg) { SmallVector VirtRegPairs; bool IsDstPhys; bool IsCopy = false; @@ -676,7 +665,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, } } -/// ProcessCopy - If the specified instruction is not yet processed, process it +/// processCopy - If the specified instruction is not yet processed, process it /// if it's a copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in /// point-to maps used to determine future optimizations. e.g. @@ -688,9 +677,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, /// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is /// potentially joined with r1 on the output side. It's worthwhile to commute /// 'add' to eliminate a copy. -void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, - MachineBasicBlock *MBB, - SmallPtrSet &Processed) { +void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { if (Processed.count(MI)) return; @@ -707,21 +694,20 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, assert(SrcRegMap[DstReg] == SrcReg && "Can't map to two src physical registers!"); - ScanUses(DstReg, MBB, Processed); + scanUses(DstReg); } Processed.insert(MI); return; } -/// RescheduleMIBelowKill - If there is one more local instruction that reads +/// rescheduleMIBelowKill - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill /// instruction in order to eliminate the need for the copy. -bool -TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg) { +bool TwoAddressInstructionPass:: +rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { // Bail immediately if we don't have LV available. We use it to find kills // efficiently. if (!LV) @@ -853,8 +839,7 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, /// isDefTooClose - Return true if the re-scheduling will put the given /// instruction too close to the defs of its register dependencies. bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, - MachineInstr *MI, - MachineBasicBlock *MBB) { + MachineInstr *MI) { for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DefMI = &*DI; @@ -873,15 +858,14 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return false; } -/// RescheduleKillAboveMI - If there is one more local instruction that reads +/// rescheduleKillAboveMI - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the kill instruction above the /// current two-address instruction in order to eliminate the need for the /// copy. -bool -TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg) { +bool TwoAddressInstructionPass:: +rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { // Bail immediately if we don't have LV available. We use it to find kills // efficiently. if (!LV) @@ -918,7 +902,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, if (MO.isUse()) { if (!MOReg) continue; - if (isDefTooClose(MOReg, DI->second, MI, MBB)) + if (isDefTooClose(MOReg, DI->second, MI)) return false; if (MOReg == Reg && !MO.isKill()) return false; @@ -1006,18 +990,16 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, return true; } -/// TryInstructionTransform - For the case where an instruction has a single +/// tryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for /// coalescing away the register copy. Returns true if no copy needs to be /// inserted to untie mi's operands (either because they were untied, or /// because mi was rescheduled, and will be visited again later). bool TwoAddressInstructionPass:: -TryInstructionTransform(MachineBasicBlock::iterator &mi, +tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist, - SmallPtrSet &Processed) { + unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { if (OptLevel == CodeGenOpt::None) return false; @@ -1030,7 +1012,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, bool regBKilled = isKilled(MI, regB, MRI, TII); if (TargetRegisterInfo::isVirtualRegister(regA)) - ScanUses(regA, &*mbbi, Processed); + scanUses(regA); // Check if it is profitable to commute the operands. unsigned SrcOp1, SrcOp2; @@ -1051,7 +1033,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) { + else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -1059,7 +1041,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } // If it's profitable to commute, try to do so. - if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; @@ -1068,7 +1050,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. - if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) { + if (rescheduleMIBelowKill(mi, nmi, regB)) { ++NumReSchedDowns; return true; } @@ -1078,7 +1060,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { // Try to convert it. - if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) { + if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) { ++NumConvertedTo3Addr; return true; // Done with this instruction. } @@ -1087,7 +1069,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule it before this MI if it's legal. - if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) { + if (rescheduleKillAboveMI(mi, nmi, regB)) { ++NumReSchedUps; return true; } @@ -1131,8 +1113,8 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Tentatively insert the instructions into the block so that they // look "normal" to the transformation logic. - mbbi->insert(mi, NewMIs[0]); - mbbi->insert(mi, NewMIs[1]); + MBB->insert(mi, NewMIs[0]); + MBB->insert(mi, NewMIs[1]); DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] << "2addr: NEW INST: " << *NewMIs[1]); @@ -1142,8 +1124,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformSuccess = - TryInstructionTransform(NewMI, mi, mbbi, - NewSrcIdx, NewDstIdx, Dist, Processed); + tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist); if (TransformSuccess || NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded @@ -1202,8 +1183,7 @@ bool TwoAddressInstructionPass:: collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { const MCInstrDesc &MCID = MI->getDesc(); bool AnyOps = false; - unsigned NumOps = MI->isInlineAsm() ? - MI->getNumOperands() : MCID.getNumOperands(); + unsigned NumOps = MI->getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; @@ -1373,22 +1353,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF->getFunction()->getName() << '\n'); + << MF->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); TiedOperandMap TiedOperands; - - SmallPtrSet Processed; - for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); - mbbi != mbbe; ++mbbi) { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MBB = MBBI; unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); - for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { MachineBasicBlock::iterator nmi = llvm::next(mi); if (mi->isDebugValue()) { @@ -1402,7 +1381,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { DistanceMap.insert(std::make_pair(mi, ++Dist)); - ProcessCopy(&*mi, &*mbbi, Processed); + processCopy(&*mi); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. @@ -1427,8 +1406,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); unsigned DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && - TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, - Processed)) { + tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) { // The tied operands have been eliminated or shifted further down the // block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); @@ -1468,7 +1446,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve // SSA form. It's now safe to de-SSA. - MadeChange |= EliminateRegSequences(); + MadeChange |= eliminateRegSequences(); return MadeChange; } @@ -1515,127 +1493,6 @@ static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) { return First; } -/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are -/// EXTRACT_SUBREG from the same register and to the same virtual register -/// with different sub-register indices, attempt to combine the -/// EXTRACT_SUBREGs and pre-coalesce them. e.g. -/// %reg1026 = VLDMQ %reg1025, 260, pred:14, pred:%reg0 -/// %reg1029:6 = EXTRACT_SUBREG %reg1026, 6 -/// %reg1029:5 = EXTRACT_SUBREG %reg1026, 5 -/// Since D subregs 5, 6 can combine to a Q register, we can coalesce -/// reg1026 to reg1029. -void -TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector &Srcs, - unsigned DstReg) { - SmallSet Seen; - for (unsigned i = 0, e = Srcs.size(); i != e; ++i) { - unsigned SrcReg = Srcs[i]; - if (!Seen.insert(SrcReg)) - continue; - - // Check that the instructions are all in the same basic block. - MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg); - MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg); - if (!SrcDefMI || !DstDefMI || - SrcDefMI->getParent() != DstDefMI->getParent()) - continue; - - // If there are no other uses than copies which feed into - // the reg_sequence, then we might be able to coalesce them. - bool CanCoalesce = true; - SmallVector SrcSubIndices, DstSubIndices; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) { - CanCoalesce = false; - break; - } - SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg()); - DstSubIndices.push_back(UseMI->getOperand(0).getSubReg()); - } - - if (!CanCoalesce || SrcSubIndices.size() < 2) - continue; - - // Check that the source subregisters can be combined. - std::sort(SrcSubIndices.begin(), SrcSubIndices.end()); - unsigned NewSrcSubIdx = 0; - if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices, - NewSrcSubIdx)) - continue; - - // Check that the destination subregisters can also be combined. - std::sort(DstSubIndices.begin(), DstSubIndices.end()); - unsigned NewDstSubIdx = 0; - if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices, - NewDstSubIdx)) - continue; - - // If neither source nor destination can be combined to the full register, - // just give up. This could be improved if it ever matters. - if (NewSrcSubIdx != 0 && NewDstSubIdx != 0) - continue; - - // Now that we know that all the uses are extract_subregs and that those - // subregs can somehow be combined, scan all the extract_subregs again to - // make sure the subregs are in the right order and can be composed. - MachineInstr *SomeMI = 0; - CanCoalesce = true; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - assert(UseMI->isCopy()); - unsigned DstSubIdx = UseMI->getOperand(0).getSubReg(); - unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg(); - assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination"); - if ((NewDstSubIdx == 0 && - TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) || - (NewSrcSubIdx == 0 && - TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) { - CanCoalesce = false; - break; - } - // Keep track of one of the uses. Preferably the first one which has a - // flag. - if (!SomeMI || UseMI->getOperand(0).isUndef()) - SomeMI = UseMI; - } - if (!CanCoalesce) - continue; - - // Insert a copy to replace the original. - MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, - SomeMI->getDebugLoc(), - TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define | - getUndefRegState(SomeMI->getOperand(0).isUndef()), - NewDstSubIdx) - .addReg(SrcReg, 0, NewSrcSubIdx); - - // Remove all the old extract instructions. - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ) { - MachineInstr *UseMI = &*UI; - ++UI; - if (UseMI == CopyMI) - continue; - assert(UseMI->isCopy()); - // Move any kills to the new copy or extract instruction. - if (UseMI->getOperand(1).isKill()) { - CopyMI->getOperand(1).setIsKill(); - if (LV) - // Update live variables - LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI); - } - UseMI->eraseFromParent(); - } - } -} - static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, MachineRegisterInfo *MRI) { for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), @@ -1647,7 +1504,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, return false; } -/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part +/// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part /// of the de-ssa process. This replaces sources of REG_SEQUENCE as /// sub-register references of the register defined by REG_SEQUENCE. e.g. /// @@ -1655,7 +1512,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, /// %reg1031 = REG_SEQUENCE %reg1029, 5, %reg1030, 6 /// => /// %reg1031:5, %reg1031:6 = VLD1q16 %reg1024, ... -bool TwoAddressInstructionPass::EliminateRegSequences() { +bool TwoAddressInstructionPass::eliminateRegSequences() { if (RegSequences.empty()) return false; @@ -1759,10 +1616,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) MO.setIsUndef(); } - // Make sure there is a full non-subreg imp-def operand on the - // instruction. This shouldn't be necessary, but it seems that at least - // RAFast requires it. - Def->addRegisterDefined(DstReg, TRI); DEBUG(dbgs() << "First def: " << *Def); } @@ -1775,12 +1628,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); } - - // Try coalescing some EXTRACT_SUBREG instructions. This can create - // INSERT_SUBREG instructions that must have flags added by - // LiveIntervalAnalysis, so only run it when LiveVariables is available. - if (LV) - CoalesceExtSubRegs(RealSrcs, DstReg); } RegSequences.clear(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 93840f0..bb93bdc 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -19,8 +19,8 @@ #define DEBUG_TYPE "regalloc" #include "VirtRegMap.h" #include "LiveDebugVariables.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -127,9 +127,11 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << '\n'; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VirtRegMap::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // VirtRegRewriter @@ -170,6 +172,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", "Virtual Register Rewriter", false, false) @@ -182,6 +185,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -197,11 +202,11 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { VRM = &getAnalysis(); DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " - << MF->getFunction()->getName() << '\n'); + << MF->getName() << '\n'); DEBUG(VRM->dump()); // Add kill flags while we still have virtual registers. - LIS->addKillFlags(); + LIS->addKillFlags(VRM); // Live-in lists on basic blocks are required for physregs. addMBBLiveIns(); @@ -252,9 +257,6 @@ void VirtRegRewriter::rewrite() { SmallVector SuperDeads; SmallVector SuperDefs; SmallVector SuperKills; -#ifndef NDEBUG - BitVector Reserved = TRI->getReservedRegs(*MF); -#endif for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { @@ -278,7 +280,7 @@ void VirtRegRewriter::rewrite() { unsigned PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); - assert(!Reserved.test(PhysReg) && "Reserved register assignment"); + assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. if (MO.getSubReg()) { diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index c320985..7974dda 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -63,8 +63,8 @@ namespace llvm { /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); - VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT - void operator=(const VirtRegMap&); // DO NOT IMPLEMENT + VirtRegMap(const VirtRegMap&) LLVM_DELETED_FUNCTION; + void operator=(const VirtRegMap&) LLVM_DELETED_FUNCTION; public: static char ID; diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index 441f1e8..1e9e509 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -8,5 +8,6 @@ add_llvm_library(LLVMDebugInfo DWARFDebugAranges.cpp DWARFDebugInfoEntry.cpp DWARFDebugLine.cpp + DWARFDebugRangeList.cpp DWARFFormValue.cpp ) diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp index e2fd55f..691a92c 100644 --- a/lib/DebugInfo/DIContext.cpp +++ b/lib/DebugInfo/DIContext.cpp @@ -18,7 +18,10 @@ DIContext *DIContext::getDWARFContext(bool isLittleEndian, StringRef abbrevSection, StringRef aRangeSection, StringRef lineSection, - StringRef stringSection) { + StringRef stringSection, + StringRef rangeSection, + const RelocAddrMap &Map) { return new DWARFContextInMemory(isLittleEndian, infoSection, abbrevSection, - aRangeSection, lineSection, stringSection); + aRangeSection, lineSection, stringSection, + rangeSection, Map); } diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp index b27d57b..bdd65b7 100644 --- a/lib/DebugInfo/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARFCompileUnit.cpp @@ -63,7 +63,7 @@ DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data, Version = debug_info_data.getU16(&offset); bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset(); Abbrevs = abbrevs; - AddrSize = debug_info_data.getU8 (&offset); + AddrSize = debug_info_data.getU8(&offset); bool versionOK = DWARFContext::isSupportedVersion(Version); bool addrSizeOK = AddrSize == 4 || AddrSize == 8; @@ -75,6 +75,15 @@ DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data, return 0; } +bool DWARFCompileUnit::extractRangeList(uint32_t RangeListOffset, + DWARFDebugRangeList &RangeList) const { + // Require that compile unit is extracted. + assert(DieArray.size() > 0); + DataExtractor RangesData(Context.getRangeSection(), + Context.isLittleEndian(), AddrSize); + return RangeList.extract(RangesData, &RangeListOffset); +} + void DWARFCompileUnit::clear() { Offset = 0; Length = 0; @@ -94,7 +103,9 @@ void DWARFCompileUnit::dump(raw_ostream &OS) { << " (next CU at " << format("0x%08x", getNextCompileUnitOffset()) << ")\n"; - getCompileUnitDIE(false)->dump(OS, this, -1U); + const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false); + assert(CU && "Null Compile Unit?"); + CU->dump(OS, this, -1U); } const char *DWARFCompileUnit::getCompilationDir() { @@ -174,11 +185,11 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { addDIE(die); return 1; } - else if (depth == 0 && initial_die_array_size == 1) { + else if (depth == 0 && initial_die_array_size == 1) // Don't append the CU die as we already did that - } else { - addDIE (die); - } + ; + else + addDIE(die); const DWARFAbbreviationDeclaration *abbrDecl = die.getAbbreviationDeclarationPtr(); @@ -199,9 +210,9 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { // Give a little bit of info if we encounter corrupt DWARF (our offset // should always terminate at or before the start of the next compilation // unit header). - if (offset > next_cu_offset) { - fprintf (stderr, "warning: DWARF compile unit extends beyond its bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); - } + if (offset > next_cu_offset) + fprintf(stderr, "warning: DWARF compile unit extends beyond its" + "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); setDIERelations(); return DieArray.size(); @@ -244,12 +255,21 @@ DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, clearDIEs(true); } -const DWARFDebugInfoEntryMinimal* -DWARFCompileUnit::getFunctionDIEForAddress(int64_t address) { +DWARFDebugInfoEntryMinimal::InlinedChain +DWARFCompileUnit::getInlinedChainForAddress(uint64_t Address) { + // First, find a subprogram that contains the given address (the root + // of inlined chain). extractDIEsIfNeeded(false); + const DWARFDebugInfoEntryMinimal *SubprogramDIE = 0; for (size_t i = 0, n = DieArray.size(); i != n; i++) { - if (DieArray[i].addressRangeContainsAddress(this, address)) - return &DieArray[i]; + if (DieArray[i].isSubprogramDIE() && + DieArray[i].addressRangeContainsAddress(this, Address)) { + SubprogramDIE = &DieArray[i]; + break; + } } - return 0; + // Get inlined chain rooted at this subprogram DIE. + if (!SubprogramDIE) + return DWARFDebugInfoEntryMinimal::InlinedChain(); + return SubprogramDIE->getInlinedChainForAddress(this, Address); } diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index b34a596..03e2862 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -12,6 +12,7 @@ #include "DWARFDebugAbbrev.h" #include "DWARFDebugInfoEntry.h" +#include "DWARFDebugRangeList.h" #include namespace llvm { @@ -45,6 +46,11 @@ public: /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it /// hasn't already been done. Returns the number of DIEs parsed at this call. size_t extractDIEsIfNeeded(bool cu_die_only); + /// extractRangeList - extracts the range list referenced by this compile + /// unit from .debug_ranges section. Returns true on success. + /// Requires that compile unit is already extracted. + bool extractRangeList(uint32_t RangeListOffset, + DWARFDebugRangeList &RangeList) const; void clear(); void dump(raw_ostream &OS); uint32_t getOffset() const { return Offset; } @@ -106,11 +112,11 @@ public: void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, bool clear_dies_if_already_not_parsed); - /// getFunctionDIEForAddress - Returns pointer to parsed subprogram DIE, - /// address ranges of which contain the provided address, - /// or NULL if there is no such subprogram. The pointer - /// is valid until DWARFCompileUnit::clear() or clearDIEs() is called. - const DWARFDebugInfoEntryMinimal *getFunctionDIEForAddress(int64_t address); + + /// getInlinedChainForAddress - fetches inlined chain for a given address. + /// Returns empty chain if there is no subprogram containing address. + DWARFDebugInfoEntryMinimal::InlinedChain getInlinedChainForAddress( + uint64_t Address); }; } diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 797662b..afd614c 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -17,6 +17,8 @@ using namespace llvm; using namespace dwarf; +typedef DWARFDebugLine::LineTable DWARFLineTable; + void DWARFContext::dump(raw_ostream &OS) { OS << ".debug_abbrev contents:\n"; getDebugAbbrev()->dump(OS); @@ -32,15 +34,17 @@ void DWARFContext::dump(raw_ostream &OS) { while (set.extract(arangesData, &offset)) set.dump(OS); + uint8_t savedAddressByteSize = 0; OS << "\n.debug_lines contents:\n"; for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) { DWARFCompileUnit *cu = getCompileUnitAtIndex(i); + savedAddressByteSize = cu->getAddressByteSize(); unsigned stmtOffset = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, -1U); if (stmtOffset != -1U) { DataExtractor lineData(getLineSection(), isLittleEndian(), - cu->getAddressByteSize()); + savedAddressByteSize); DWARFDebugLine::DumpingState state(OS); DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state); } @@ -54,6 +58,18 @@ void DWARFContext::dump(raw_ostream &OS) { OS << format("0x%8.8x: \"%s\"\n", lastOffset, s); lastOffset = offset; } + + OS << "\n.debug_ranges contents:\n"; + // In fact, different compile units may have different address byte + // sizes, but for simplicity we just use the address byte size of the last + // compile unit (there is no easy and fast way to associate address range + // list and the compile unit it describes). + DataExtractor rangesData(getRangeSection(), isLittleEndian(), + savedAddressByteSize); + offset = 0; + DWARFDebugRangeList rangeList; + while (rangeList.extract(rangesData, &offset)) + rangeList.dump(OS); } const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { @@ -80,7 +96,7 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() { return Aranges.get(); } -const DWARFDebugLine::LineTable * +const DWARFLineTable * DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { if (!Line) Line.reset(new DWARFDebugLine()); @@ -92,7 +108,7 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { return 0; // No line table for this compile unit. // See if the line table is cached. - if (const DWARFDebugLine::LineTable *lt = Line->getLineTable(stmtOffset)) + if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset)) return lt; // We have to parse it first. @@ -103,11 +119,11 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { void DWARFContext::parseCompileUnits() { uint32_t offset = 0; - const DataExtractor &debug_info_data = DataExtractor(getInfoSection(), - isLittleEndian(), 0); - while (debug_info_data.isValidOffset(offset)) { + const DataExtractor &DIData = DataExtractor(getInfoSection(), + isLittleEndian(), 0); + while (DIData.isValidOffset(offset)) { CUs.push_back(DWARFCompileUnit(*this)); - if (!CUs.back().extract(debug_info_data, &offset)) { + if (!CUs.back().extract(DIData, &offset)) { CUs.pop_back(); break; } @@ -131,75 +147,155 @@ namespace { }; } -DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t offset) { +DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { if (CUs.empty()) parseCompileUnits(); - DWARFCompileUnit *i = std::lower_bound(CUs.begin(), CUs.end(), offset, - OffsetComparator()); - if (i != CUs.end()) - return &*i; + DWARFCompileUnit *CU = std::lower_bound(CUs.begin(), CUs.end(), Offset, + OffsetComparator()); + if (CU != CUs.end()) + return &*CU; return 0; } -DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address, - DILineInfoSpecifier specifier) { +DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { // First, get the offset of the compile unit. - uint32_t cuOffset = getDebugAranges()->findAddress(address); + uint32_t CUOffset = getDebugAranges()->findAddress(Address); // Retrieve the compile unit. - DWARFCompileUnit *cu = getCompileUnitForOffset(cuOffset); - if (!cu) + return getCompileUnitForOffset(CUOffset); +} + +static bool getFileNameForCompileUnit(DWARFCompileUnit *CU, + const DWARFLineTable *LineTable, + uint64_t FileIndex, + bool NeedsAbsoluteFilePath, + std::string &FileName) { + if (CU == 0 || + LineTable == 0 || + !LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath, + FileName)) + return false; + if (NeedsAbsoluteFilePath && sys::path::is_relative(FileName)) { + // We may still need to append compilation directory of compile unit. + SmallString<16> AbsolutePath; + if (const char *CompilationDir = CU->getCompilationDir()) { + sys::path::append(AbsolutePath, CompilationDir); + } + sys::path::append(AbsolutePath, FileName); + FileName = AbsolutePath.str(); + } + return true; +} + +static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU, + const DWARFLineTable *LineTable, + uint64_t Address, + bool NeedsAbsoluteFilePath, + std::string &FileName, + uint32_t &Line, uint32_t &Column) { + if (CU == 0 || LineTable == 0) + return false; + // Get the index of row we're looking for in the line table. + uint32_t RowIndex = LineTable->lookupAddress(Address); + if (RowIndex == -1U) + return false; + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; + if (!getFileNameForCompileUnit(CU, LineTable, Row.File, + NeedsAbsoluteFilePath, FileName)) + return false; + Line = Row.Line; + Column = Row.Column; + return true; +} + +DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier) { + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) return DILineInfo(); - SmallString<16> fileName(""); - SmallString<16> functionName(""); - uint32_t line = 0; - uint32_t column = 0; - if (specifier.needs(DILineInfoSpecifier::FunctionName)) { - const DWARFDebugInfoEntryMinimal *function_die = - cu->getFunctionDIEForAddress(address); - if (function_die) { - if (const char *name = function_die->getSubprogramName(cu)) - functionName = name; + std::string FileName = ""; + std::string FunctionName = ""; + uint32_t Line = 0; + uint32_t Column = 0; + if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { + // The address may correspond to instruction in some inlined function, + // so we have to build the chain of inlined functions and take the + // name of the topmost function in it. + const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.size() > 0) { + const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0]; + if (const char *Name = TopFunctionDIE.getSubroutineName(CU)) + FunctionName = Name; } } - if (specifier.needs(DILineInfoSpecifier::FileLineInfo)) { - // Get the line table for this compile unit. - const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); - if (lineTable) { - // Get the index of the row we're looking for in the line table. - uint32_t rowIndex = lineTable->lookupAddress(address); - if (rowIndex != -1U) { - const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; - // Take file/line info from the line table. - const DWARFDebugLine::FileNameEntry &fileNameEntry = - lineTable->Prologue.FileNames[row.File - 1]; - fileName = fileNameEntry.Name; - if (specifier.needs(DILineInfoSpecifier::AbsoluteFilePath) && - sys::path::is_relative(fileName.str())) { - // Append include directory of file (if it is present in line table) - // and compilation directory of compile unit to make path absolute. - const char *includeDir = 0; - if (uint64_t includeDirIndex = fileNameEntry.DirIdx) { - includeDir = lineTable->Prologue - .IncludeDirectories[includeDirIndex - 1]; - } - SmallString<16> absFileName; - if (includeDir == 0 || sys::path::is_relative(includeDir)) { - if (const char *compilationDir = cu->getCompilationDir()) - sys::path::append(absFileName, compilationDir); - } - if (includeDir) { - sys::path::append(absFileName, includeDir); - } - sys::path::append(absFileName, fileName.str()); - fileName = absFileName; - } - line = row.Line; - column = row.Column; + if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU); + const bool NeedsAbsoluteFilePath = + Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + getFileLineInfoForCompileUnit(CU, LineTable, Address, + NeedsAbsoluteFilePath, + FileName, Line, Column); + } + return DILineInfo(StringRef(FileName), StringRef(FunctionName), + Line, Column); +} + +DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier) { + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) + return DIInliningInfo(); + + const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.size() == 0) + return DIInliningInfo(); + + DIInliningInfo InliningInfo; + uint32_t CallFile = 0, CallLine = 0, CallColumn = 0; + const DWARFLineTable *LineTable = 0; + for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) { + const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain[i]; + std::string FileName = ""; + std::string FunctionName = ""; + uint32_t Line = 0; + uint32_t Column = 0; + // Get function name if necessary. + if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { + if (const char *Name = FunctionDIE.getSubroutineName(CU)) + FunctionName = Name; + } + if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + const bool NeedsAbsoluteFilePath = + Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + if (i == 0) { + // For the topmost frame, initialize the line table of this + // compile unit and fetch file/line info from it. + LineTable = getLineTableForCompileUnit(CU); + // For the topmost routine, get file/line info from line table. + getFileLineInfoForCompileUnit(CU, LineTable, Address, + NeedsAbsoluteFilePath, + FileName, Line, Column); + } else { + // Otherwise, use call file, call line and call column from + // previous DIE in inlined chain. + getFileNameForCompileUnit(CU, LineTable, CallFile, + NeedsAbsoluteFilePath, FileName); + Line = CallLine; + Column = CallColumn; + } + // Get call file/line/column of a current DIE. + if (i + 1 < n) { + FunctionDIE.getCallerFrame(CU, CallFile, CallLine, CallColumn); } } + DILineInfo Frame(StringRef(FileName), StringRef(FunctionName), + Line, Column); + InliningInfo.addFrame(Frame); } - return DILineInfo(fileName, functionName, line, column); + return InliningInfo; } void DWARFContextInMemory::anchor() { } diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index e55a27e..4001792 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -13,6 +13,7 @@ #include "DWARFCompileUnit.h" #include "DWARFDebugAranges.h" #include "DWARFDebugLine.h" +#include "DWARFDebugRangeList.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -25,21 +26,24 @@ namespace llvm { /// methods that a concrete implementation provides. class DWARFContext : public DIContext { bool IsLittleEndian; + const RelocAddrMap &RelocMap; SmallVector CUs; OwningPtr Abbrev; OwningPtr Aranges; OwningPtr Line; - DWARFContext(DWARFContext &); // = delete - DWARFContext &operator=(DWARFContext &); // = delete + DWARFContext(DWARFContext &) LLVM_DELETED_FUNCTION; + DWARFContext &operator=(DWARFContext &) LLVM_DELETED_FUNCTION; /// Read compile units from the debug_info section and store them in CUs. void parseCompileUnits(); protected: - DWARFContext(bool isLittleEndian) : IsLittleEndian(isLittleEndian) {} + DWARFContext(bool isLittleEndian, const RelocAddrMap &Map) : + IsLittleEndian(isLittleEndian), RelocMap(Map) {} public: virtual void dump(raw_ostream &OS); + /// Get the number of compile units in this context. unsigned getNumCompileUnits() { if (CUs.empty()) @@ -53,9 +57,6 @@ public: return &CUs[index]; } - /// Return the compile unit that includes an offset (relative to .debug_info). - DWARFCompileUnit *getCompileUnitForOffset(uint32_t offset); - /// Get a pointer to the parsed DebugAbbrev object. const DWARFDebugAbbrev *getDebugAbbrev(); @@ -66,22 +67,32 @@ public: const DWARFDebugLine::LineTable * getLineTableForCompileUnit(DWARFCompileUnit *cu); - virtual DILineInfo getLineInfoForAddress(uint64_t address, - DILineInfoSpecifier specifier = DILineInfoSpecifier()); + virtual DILineInfo getLineInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()); + virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()); bool isLittleEndian() const { return IsLittleEndian; } + const RelocAddrMap &relocMap() const { return RelocMap; } virtual StringRef getInfoSection() = 0; virtual StringRef getAbbrevSection() = 0; virtual StringRef getARangeSection() = 0; virtual StringRef getLineSection() = 0; virtual StringRef getStringSection() = 0; + virtual StringRef getRangeSection() = 0; static bool isSupportedVersion(unsigned version) { return version == 2 || version == 3; } -}; +private: + /// Return the compile unit that includes an offset (relative to .debug_info). + DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset); + /// Return the compile unit which contains instruction with provided + /// address. + DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address); +}; /// DWARFContextInMemory is the simplest possible implementation of a /// DWARFContext. It assumes all content is available in memory and stores @@ -93,19 +104,23 @@ class DWARFContextInMemory : public DWARFContext { StringRef ARangeSection; StringRef LineSection; StringRef StringSection; + StringRef RangeSection; public: DWARFContextInMemory(bool isLittleEndian, StringRef infoSection, StringRef abbrevSection, StringRef aRangeSection, StringRef lineSection, - StringRef stringSection) - : DWARFContext(isLittleEndian), + StringRef stringSection, + StringRef rangeSection, + const RelocAddrMap &Map = RelocAddrMap()) + : DWARFContext(isLittleEndian, Map), InfoSection(infoSection), AbbrevSection(abbrevSection), ARangeSection(aRangeSection), LineSection(lineSection), - StringSection(stringSection) + StringSection(stringSection), + RangeSection(rangeSection) {} virtual StringRef getInfoSection() { return InfoSection; } @@ -113,6 +128,7 @@ public: virtual StringRef getARangeSection() { return ARangeSection; } virtual StringRef getLineSection() { return LineSection; } virtual StringRef getStringSection() { return StringSection; } + virtual StringRef getRangeSection() { return RangeSection; } }; } diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index ef470e5..f9a34c9 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -62,7 +62,6 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) { uint32_t offset = 0; typedef std::vector SetCollection; - typedef SetCollection::const_iterator SetCollectionIter; SetCollection sets; DWARFDebugArangeSet set; diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 429a36c..ab67464 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -1,4 +1,4 @@ -//===-- DWARFDebugInfoEntry.cpp --------------------------------------------===// +//===-- DWARFDebugInfoEntry.cpp -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -101,7 +101,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, DataExtractor debug_info_data = cu->getDebugInfoExtractor(); uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr); - assert (fixed_form_sizes); // For best performance this should be specified! + assert(fixed_form_sizes); // For best performance this should be specified! if (abbrCode) { uint32_t offset = *offset_ptr; @@ -126,6 +126,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, switch (form) { // Blocks if inlined data that have a length field and the data bytes // inlined in the .debug_info. + case DW_FORM_exprloc: case DW_FORM_block: form_size = debug_info_data.getULEB128(&offset); break; @@ -150,6 +151,11 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, form_size = cu->getAddressByteSize(); break; + // 0 sized form. + case DW_FORM_flag_present: + form_size = 0; + break; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -173,6 +179,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: form_size = 8; break; @@ -188,6 +195,13 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, form = debug_info_data.getULEB128(&offset); break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + debug_info_data.getU32(offset_ptr); + else + debug_info_data.getU64(offset_ptr); + break; + default: *offset_ptr = Offset; return false; @@ -249,6 +263,7 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, switch (form) { // Blocks if inlined data that have a length field and the data // bytes // inlined in the .debug_info + case DW_FORM_exprloc: case DW_FORM_block: form_size = debug_info_data.getULEB128(&offset); break; @@ -273,6 +288,11 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, form_size = cu_addr_size; break; + // 0 byte value + case DW_FORM_flag_present: + form_size = 0; + break; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -299,6 +319,7 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: form_size = 8; break; @@ -314,6 +335,13 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, form_is_indirect = true; break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + debug_info_data.getU32(offset_ptr); + else + debug_info_data.getU64(offset_ptr); + break; + default: *offset_ptr = offset; return false; @@ -336,6 +364,16 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, return false; } +bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const { + return getTag() == DW_TAG_subprogram; +} + +bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const { + uint32_t Tag = getTag(); + return Tag == DW_TAG_subprogram || + Tag == DW_TAG_inlined_subroutine; +} + uint32_t DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu, const uint16_t attr, @@ -373,9 +411,10 @@ DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu, const char* DWARFDebugInfoEntryMinimal::getAttributeValueAsString( - const DWARFCompileUnit* cu, - const uint16_t attr, - const char* fail_value) const { + const DWARFCompileUnit* cu, + const uint16_t attr, + const char* fail_value) + const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) { DataExtractor stringExtractor(cu->getContext().getStringSection(), @@ -387,9 +426,9 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsString( uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned( - const DWARFCompileUnit* cu, - const uint16_t attr, - uint64_t fail_value) const { + const DWARFCompileUnit* cu, + const uint16_t attr, + uint64_t fail_value) const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) return form_value.getUnsigned(); @@ -398,9 +437,9 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned( int64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned( - const DWARFCompileUnit* cu, - const uint16_t attr, - int64_t fail_value) const { + const DWARFCompileUnit* cu, + const uint16_t attr, + int64_t fail_value) const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) return form_value.getSigned(); @@ -409,33 +448,42 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned( uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( - const DWARFCompileUnit* cu, - const uint16_t attr, - uint64_t fail_value) const { + const DWARFCompileUnit* cu, + const uint16_t attr, + uint64_t fail_value) + const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) return form_value.getReference(cu); return fail_value; } +bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFCompileUnit *CU, + uint64_t &LowPC, + uint64_t &HighPC) const { + HighPC = -1ULL; + LowPC = getAttributeValueAsUnsigned(CU, DW_AT_low_pc, -1ULL); + if (LowPC != -1ULL) + HighPC = getAttributeValueAsUnsigned(CU, DW_AT_high_pc, -1ULL); + return (HighPC != -1ULL); +} + void -DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, - DWARFDebugAranges *debug_aranges) +DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *CU, + DWARFDebugAranges *DebugAranges) const { if (AbbrevDecl) { - uint16_t tag = AbbrevDecl->getTag(); - if (tag == DW_TAG_subprogram) { - uint64_t hi_pc = -1ULL; - uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); - if (lo_pc != -1ULL) - hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); - if (hi_pc != -1ULL) - debug_aranges->appendRange(cu->getOffset(), lo_pc, hi_pc); + if (isSubprogramDIE()) { + uint64_t LowPC, HighPC; + if (getLowAndHighPC(CU, LowPC, HighPC)) { + DebugAranges->appendRange(CU->getOffset(), LowPC, HighPC); + } + // FIXME: try to append ranges from .debug_ranges section. } const DWARFDebugInfoEntryMinimal *child = getFirstChild(); while (child) { - child->buildAddressRangeTable(cu, debug_aranges); + child->buildAddressRangeTable(CU, DebugAranges); child = child->getSibling(); } } @@ -443,51 +491,95 @@ DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( - const DWARFCompileUnit *cu, const uint64_t address) const { - if (!isNULL() && getTag() == DW_TAG_subprogram) { - uint64_t hi_pc = -1ULL; - uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); - if (lo_pc != -1ULL) - hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); - if (hi_pc != -1ULL) { - return (lo_pc <= address && address < hi_pc); - } + const DWARFCompileUnit *CU, + const uint64_t Address) + const { + if (isNULL()) + return false; + uint64_t LowPC, HighPC; + if (getLowAndHighPC(CU, LowPC, HighPC)) + return (LowPC <= Address && Address <= HighPC); + // Try to get address ranges from .debug_ranges section. + uint32_t RangesOffset = getAttributeValueAsReference(CU, DW_AT_ranges, -1U); + if (RangesOffset != -1U) { + DWARFDebugRangeList RangeList; + if (CU->extractRangeList(RangesOffset, RangeList)) + return RangeList.containsAddress(CU->getBaseAddress(), Address); } return false; } const char* -DWARFDebugInfoEntryMinimal::getSubprogramName( - const DWARFCompileUnit *cu) const { - if (isNULL() || getTag() != DW_TAG_subprogram) +DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFCompileUnit *CU) + const { + if (!isSubroutineDIE()) return 0; // Try to get mangled name if possible. if (const char *name = - getAttributeValueAsString(cu, DW_AT_MIPS_linkage_name, 0)) + getAttributeValueAsString(CU, DW_AT_MIPS_linkage_name, 0)) return name; - if (const char *name = getAttributeValueAsString(cu, DW_AT_linkage_name, 0)) + if (const char *name = getAttributeValueAsString(CU, DW_AT_linkage_name, 0)) return name; - if (const char *name = getAttributeValueAsString(cu, DW_AT_name, 0)) + if (const char *name = getAttributeValueAsString(CU, DW_AT_name, 0)) return name; // Try to get name from specification DIE. uint32_t spec_ref = - getAttributeValueAsReference(cu, DW_AT_specification, -1U); + getAttributeValueAsReference(CU, DW_AT_specification, -1U); if (spec_ref != -1U) { DWARFDebugInfoEntryMinimal spec_die; - if (spec_die.extract(cu, &spec_ref)) { - if (const char *name = spec_die.getSubprogramName(cu)) + if (spec_die.extract(CU, &spec_ref)) { + if (const char *name = spec_die.getSubroutineName(CU)) return name; } } // Try to get name from abstract origin DIE. uint32_t abs_origin_ref = - getAttributeValueAsReference(cu, DW_AT_abstract_origin, -1U); + getAttributeValueAsReference(CU, DW_AT_abstract_origin, -1U); if (abs_origin_ref != -1U) { DWARFDebugInfoEntryMinimal abs_origin_die; - if (abs_origin_die.extract(cu, &abs_origin_ref)) { - if (const char *name = abs_origin_die.getSubprogramName(cu)) + if (abs_origin_die.extract(CU, &abs_origin_ref)) { + if (const char *name = abs_origin_die.getSubroutineName(CU)) return name; } } return 0; } + +void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFCompileUnit *CU, + uint32_t &CallFile, + uint32_t &CallLine, + uint32_t &CallColumn) const { + CallFile = getAttributeValueAsUnsigned(CU, DW_AT_call_file, 0); + CallLine = getAttributeValueAsUnsigned(CU, DW_AT_call_line, 0); + CallColumn = getAttributeValueAsUnsigned(CU, DW_AT_call_column, 0); +} + +DWARFDebugInfoEntryMinimal::InlinedChain +DWARFDebugInfoEntryMinimal::getInlinedChainForAddress( + const DWARFCompileUnit *CU, + const uint64_t Address) + const { + DWARFDebugInfoEntryMinimal::InlinedChain InlinedChain; + if (isNULL()) + return InlinedChain; + for (const DWARFDebugInfoEntryMinimal *DIE = this; DIE; ) { + // Append current DIE to inlined chain only if it has correct tag + // (e.g. it is not a lexical block). + if (DIE->isSubroutineDIE()) { + InlinedChain.push_back(*DIE); + } + // Try to get child which also contains provided address. + const DWARFDebugInfoEntryMinimal *Child = DIE->getFirstChild(); + while (Child) { + if (Child->addressRangeContainsAddress(CU, Address)) { + // Assume there is only one such child. + break; + } + Child = Child->getSibling(); + } + DIE = Child; + } + // Reverse the obtained chain to make the root of inlined chain last. + std::reverse(InlinedChain.begin(), InlinedChain.end()); + return InlinedChain; +} diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index d5d86b9..9c1b2be 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H #include "DWARFAbbreviationDeclaration.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -19,6 +20,7 @@ class DWARFDebugAranges; class DWARFCompileUnit; class DWARFContext; class DWARFFormValue; +class DWARFInlinedSubroutineChain; /// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data. class DWARFDebugInfoEntryMinimal { @@ -52,6 +54,13 @@ public: uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == 0; } + + /// Returns true if DIE represents a subprogram (not inlined). + bool isSubprogramDIE() const; + /// Returns true if DIE represents a subprogram or an inlined + /// subroutine. + bool isSubroutineDIE() const; + uint32_t getOffset() const { return Offset; } uint32_t getNumAttributes() const { return !isNULL() ? AbbrevDecl->getNumAttributes() : 0; @@ -126,17 +135,40 @@ public: const uint16_t attr, int64_t fail_value) const; - void buildAddressRangeTable(const DWARFCompileUnit *cu, - DWARFDebugAranges *debug_aranges) const; - - bool addressRangeContainsAddress(const DWARFCompileUnit *cu, - const uint64_t address) const; - - // If a DIE represents a subprogram, returns its mangled name - // (or short name, if mangled is missing). This name may be fetched - // from specification or abstract origin for this subprogram. - // Returns null if no name is found. - const char* getSubprogramName(const DWARFCompileUnit *cu) const; + /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU. + /// Returns true if both attributes are present. + bool getLowAndHighPC(const DWARFCompileUnit *CU, + uint64_t &LowPC, uint64_t &HighPC) const; + + void buildAddressRangeTable(const DWARFCompileUnit *CU, + DWARFDebugAranges *DebugAranges) const; + + bool addressRangeContainsAddress(const DWARFCompileUnit *CU, + const uint64_t Address) const; + + /// If a DIE represents a subprogram (or inlined subroutine), + /// returns its mangled name (or short name, if mangled is missing). + /// This name may be fetched from specification or abstract origin + /// for this subprogram. Returns null if no name is found. + const char* getSubroutineName(const DWARFCompileUnit *CU) const; + + /// Retrieves values of DW_AT_call_file, DW_AT_call_line and + /// DW_AT_call_column from DIE (or zeroes if they are missing). + void getCallerFrame(const DWARFCompileUnit *CU, uint32_t &CallFile, + uint32_t &CallLine, uint32_t &CallColumn) const; + + /// InlinedChain - represents a chain of inlined_subroutine + /// DIEs, (possibly ending with subprogram DIE), all of which are contained + /// in some concrete inlined instance tree. Address range for each DIE + /// (except the last DIE) in this chain is contained in address + /// range for next DIE in the chain. + typedef SmallVector InlinedChain; + + /// Get inlined chain for a given address, rooted at the current DIE. + /// Returns empty chain if address is not contained in address range + /// of current DIE. + InlinedChain getInlinedChainForAddress(const DWARFCompileUnit *CU, + const uint64_t Address) const; }; } diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index d99575d..267364a 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -10,6 +10,7 @@ #include "DWARFDebugLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -513,3 +514,29 @@ DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { } return index; } + +bool +DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, + bool NeedsAbsoluteFilePath, + std::string &Result) const { + if (FileIndex == 0 || FileIndex > Prologue.FileNames.size()) + return false; + const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1]; + const char *FileName = Entry.Name; + if (!NeedsAbsoluteFilePath || + sys::path::is_absolute(FileName)) { + Result = FileName; + return true; + } + SmallString<16> FilePath; + uint64_t IncludeDirIndex = Entry.DirIdx; + // Be defensive about the contents of Entry. + if (IncludeDirIndex > 0 && + IncludeDirIndex <= Prologue.IncludeDirectories.size()) { + const char *IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1]; + sys::path::append(FilePath, IncludeDir); + } + sys::path::append(FilePath, FileName); + Result = FilePath.str(); + return true; +} diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index 6382b45..586dd7e 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -12,6 +12,7 @@ #include "llvm/Support/DataExtractor.h" #include +#include #include namespace llvm { @@ -174,6 +175,13 @@ public: // Returns the index of the row with file/line info for a given address, // or -1 if there is no such row. uint32_t lookupAddress(uint64_t address) const; + + // Extracts filename by its index in filename table in prologue. + // Returns true on success. + bool getFileNameByIndex(uint64_t FileIndex, + bool NeedsAbsoluteFilePath, + std::string &Result) const; + void dump(raw_ostream &OS) const; struct Prologue Prologue; diff --git a/lib/DebugInfo/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARFDebugRangeList.cpp new file mode 100644 index 0000000..1806bee --- /dev/null +++ b/lib/DebugInfo/DWARFDebugRangeList.cpp @@ -0,0 +1,67 @@ +//===-- DWARFDebugRangesList.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugRangeList.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void DWARFDebugRangeList::clear() { + Offset = -1U; + AddressSize = 0; + Entries.clear(); +} + +bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { + clear(); + if (!data.isValidOffset(*offset_ptr)) + return false; + AddressSize = data.getAddressSize(); + if (AddressSize != 4 && AddressSize != 8) + return false; + Offset = *offset_ptr; + while (true) { + RangeListEntry entry; + uint32_t prev_offset = *offset_ptr; + entry.StartAddress = data.getAddress(offset_ptr); + entry.EndAddress = data.getAddress(offset_ptr); + // Check that both values were extracted correctly. + if (*offset_ptr != prev_offset + 2 * AddressSize) { + clear(); + return false; + } + if (entry.isEndOfListEntry()) + break; + Entries.push_back(entry); + } + return true; +} + +void DWARFDebugRangeList::dump(raw_ostream &OS) const { + for (int i = 0, n = Entries.size(); i != n; ++i) { + const char *format_str = (AddressSize == 4 + ? "%08x %08" PRIx64 " %08" PRIx64 "\n" + : "%08x %016" PRIx64 " %016" PRIx64 "\n"); + OS << format(format_str, Offset, Entries[i].StartAddress, + Entries[i].EndAddress); + } + OS << format("%08x \n", Offset); +} + +bool DWARFDebugRangeList::containsAddress(uint64_t BaseAddress, + uint64_t Address) const { + for (int i = 0, n = Entries.size(); i != n; ++i) { + if (Entries[i].isBaseAddressSelectionEntry(AddressSize)) + BaseAddress = Entries[i].EndAddress; + else if (Entries[i].containsAddress(BaseAddress, Address)) + return true; + } + return false; +} diff --git a/lib/DebugInfo/DWARFDebugRangeList.h b/lib/DebugInfo/DWARFDebugRangeList.h new file mode 100644 index 0000000..4e34a91 --- /dev/null +++ b/lib/DebugInfo/DWARFDebugRangeList.h @@ -0,0 +1,78 @@ +//===-- DWARFDebugRangeList.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H +#define LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H + +#include "llvm/Support/DataExtractor.h" +#include + +namespace llvm { + +class raw_ostream; + +class DWARFDebugRangeList { +public: + struct RangeListEntry { + // A beginning address offset. This address offset has the size of an + // address and is relative to the applicable base address of the + // compilation unit referencing this range list. It marks the beginning + // of an address range. + uint64_t StartAddress; + // An ending address offset. This address offset again has the size of + // an address and is relative to the applicable base address of the + // compilation unit referencing this range list. It marks the first + // address past the end of the address range. The ending address must + // be greater than or equal to the beginning address. + uint64_t EndAddress; + // The end of any given range list is marked by an end of list entry, + // which consists of a 0 for the beginning address offset + // and a 0 for the ending address offset. + bool isEndOfListEntry() const { + return (StartAddress == 0) && (EndAddress == 0); + } + // A base address selection entry consists of: + // 1. The value of the largest representable address offset + // (for example, 0xffffffff when the size of an address is 32 bits). + // 2. An address, which defines the appropriate base address for + // use in interpreting the beginning and ending address offsets of + // subsequent entries of the location list. + bool isBaseAddressSelectionEntry(uint8_t AddressSize) const { + assert(AddressSize == 4 || AddressSize == 8); + if (AddressSize == 4) + return StartAddress == -1U; + else + return StartAddress == -1ULL; + } + bool containsAddress(uint64_t BaseAddress, uint64_t Address) const { + return (BaseAddress + StartAddress <= Address) && + (Address < BaseAddress + EndAddress); + } + }; + +private: + // Offset in .debug_ranges section. + uint32_t Offset; + uint8_t AddressSize; + std::vector Entries; + +public: + DWARFDebugRangeList() { clear(); } + void clear(); + void dump(raw_ostream &OS) const; + bool extract(DataExtractor data, uint32_t *offset_ptr); + /// containsAddress - Returns true if range list contains the given + /// address. Has to be passed base address of the compile unit that + /// references this range list. + bool containsAddress(uint64_t BaseAddress, uint64_t Address) const; +}; + +} // namespace llvm + +#endif // LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index ee2a3ab..fea9fd7 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -10,6 +10,7 @@ #include "DWARFFormValue.h" #include "DWARFCompileUnit.h" #include "DWARFContext.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -41,6 +42,10 @@ static const uint8_t form_sizes_addr4[] = { 8, // 0x14 DW_FORM_ref8 0, // 0x15 DW_FORM_ref_udata 0, // 0x16 DW_FORM_indirect + 4, // 0x17 DW_FORM_sec_offset + 0, // 0x18 DW_FORM_exprloc + 0, // 0x19 DW_FORM_flag_present + 8, // 0x20 DW_FORM_ref_sig8 }; static const uint8_t form_sizes_addr8[] = { @@ -67,6 +72,10 @@ static const uint8_t form_sizes_addr8[] = { 8, // 0x14 DW_FORM_ref8 0, // 0x15 DW_FORM_ref_udata 0, // 0x16 DW_FORM_indirect + 8, // 0x17 DW_FORM_sec_offset + 0, // 0x18 DW_FORM_exprloc + 0, // 0x19 DW_FORM_flag_present + 8, // 0x20 DW_FORM_ref_sig8 }; const uint8_t * @@ -90,9 +99,18 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, indirect = false; switch (Form) { case DW_FORM_addr: - case DW_FORM_ref_addr: - Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()); + case DW_FORM_ref_addr: { + RelocAddrMap::const_iterator AI + = cu->getContext().relocMap().find(*offset_ptr); + if (AI != cu->getContext().relocMap().end()) { + const std::pair &R = AI->second; + Value.uval = R.second; + *offset_ptr += R.first; + } else + Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()); + } break; + case DW_FORM_exprloc: case DW_FORM_block: Value.uval = data.getULEB128(offset_ptr); is_block = true; @@ -129,9 +147,17 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, case DW_FORM_sdata: Value.sval = data.getSLEB128(offset_ptr); break; - case DW_FORM_strp: - Value.uval = data.getU32(offset_ptr); + case DW_FORM_strp: { + RelocAddrMap::const_iterator AI + = cu->getContext().relocMap().find(*offset_ptr); + if (AI != cu->getContext().relocMap().end()) { + const std::pair &R = AI->second; + Value.uval = R.second; + *offset_ptr += R.first; + } else + Value.uval = data.getU32(offset_ptr); break; + } case DW_FORM_udata: case DW_FORM_ref_udata: Value.uval = data.getULEB128(offset_ptr); @@ -141,12 +167,24 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, // Set the string value to also be the data for inlined cstr form // values only so we can tell the differnence between DW_FORM_string // and DW_FORM_strp form values - Value.data = (uint8_t*)Value.cstr; + Value.data = (const uint8_t*)Value.cstr; break; case DW_FORM_indirect: Form = data.getULEB128(offset_ptr); indirect = true; break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + Value.uval = data.getU32(offset_ptr); + else + Value.uval = data.getU64(offset_ptr); + break; + case DW_FORM_flag_present: + Value.uval = 1; + break; + case DW_FORM_ref_sig8: + Value.uval = data.getU64(offset_ptr); + break; default: return false; } @@ -179,6 +217,7 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, switch (form) { // Blocks if inlined data that have a length field and the data bytes // inlined in the .debug_info + case DW_FORM_exprloc: case DW_FORM_block: { uint64_t size = debug_info_data.getULEB128(offset_ptr); *offset_ptr += size; @@ -211,6 +250,10 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, *offset_ptr += cu->getAddressByteSize(); return true; + // 0 byte values - implied from the form. + case DW_FORM_flag_present: + return true; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -234,6 +277,7 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: *offset_ptr += 8; return true; @@ -249,6 +293,15 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, indirect = true; form = debug_info_data.getULEB128(offset_ptr); break; + + // 4 for DWARF32, 8 for DWARF64. + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + *offset_ptr += 4; + else + *offset_ptr += 8; + return true; + default: return false; } @@ -264,22 +317,26 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { switch (Form) { case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; + case DW_FORM_flag_present: OS << "true"; break; case DW_FORM_flag: case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break; case DW_FORM_data2: OS << format("0x%04x", (uint16_t)uvalue); break; case DW_FORM_data4: OS << format("0x%08x", (uint32_t)uvalue); break; + case DW_FORM_ref_sig8: case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_string: OS << '"'; OS.write_escaped(getAsCString(NULL)); OS << '"'; break; + case DW_FORM_exprloc: case DW_FORM_block: case DW_FORM_block1: case DW_FORM_block2: case DW_FORM_block4: if (uvalue > 0) { switch (Form) { + case DW_FORM_exprloc: case DW_FORM_block: OS << format("<0x%" PRIx64 "> ", uvalue); break; case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break; case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break; @@ -342,6 +399,14 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { case DW_FORM_indirect: OS << "DW_FORM_indirect"; break; + + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + OS << format("0x%08x", (uint32_t)uvalue); + else + OS << format("0x%016" PRIx64, uvalue); + break; + default: OS << format("DW_FORM(0x%4.4x)", Form); break; @@ -404,6 +469,7 @@ const uint8_t *DWARFFormValue::BlockData() const { bool DWARFFormValue::isBlockForm(uint16_t form) { switch (form) { + case DW_FORM_exprloc: case DW_FORM_block: case DW_FORM_block1: case DW_FORM_block2: diff --git a/lib/DebugInfo/DWARFFormValue.h b/lib/DebugInfo/DWARFFormValue.h index 22ac011..c5b590d 100644 --- a/lib/DebugInfo/DWARFFormValue.h +++ b/lib/DebugInfo/DWARFFormValue.h @@ -52,7 +52,7 @@ public: bool extractValue(DataExtractor data, uint32_t *offset_ptr, const DWARFCompileUnit *cu); bool isInlinedCStr() const { - return Value.data != NULL && Value.data == (uint8_t*)Value.cstr; + return Value.data != NULL && Value.data == (const uint8_t*)Value.cstr; } const uint8_t *BlockData() const; uint64_t getReference(const DWARFCompileUnit* cu) const; diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index a744d0c..05987f2 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -29,7 +29,7 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include #include @@ -91,11 +91,11 @@ class GVMemoryBlock : public CallbackVH { public: /// \brief Returns the address the GlobalVariable should be written into. The /// GVMemoryBlock object prefixes that. - static char *Create(const GlobalVariable *GV, const TargetData& TD) { + static char *Create(const GlobalVariable *GV, const DataLayout& TD) { Type *ElTy = GV->getType()->getElementType(); size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy); void *RawMemory = ::operator new( - TargetData::RoundUpAlignment(sizeof(GVMemoryBlock), + DataLayout::RoundUpAlignment(sizeof(GVMemoryBlock), TD.getPreferredAlignment(GV)) + GVSize); new(RawMemory) GVMemoryBlock(GV); @@ -113,7 +113,7 @@ public: } // anonymous namespace char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) { - return GVMemoryBlock::Create(GV, *getTargetData()); + return GVMemoryBlock::Create(GV, *getDataLayout()); } bool ExecutionEngine::removeModule(Module *M) { @@ -267,7 +267,7 @@ public: void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, const std::vector &InputArgv) { clear(); // Free the old contents. - unsigned PtrSize = EE->getTargetData()->getPointerSize(); + unsigned PtrSize = EE->getDataLayout()->getPointerSize(); Array = new char[(InputArgv.size()+1)*PtrSize]; DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n"); @@ -342,7 +342,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) { #ifndef NDEBUG /// isTargetNullPtr - Return whether the target pointer stored at Loc is null. static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) { - unsigned PtrSize = EE->getTargetData()->getPointerSize(); + unsigned PtrSize = EE->getDataLayout()->getPointerSize(); for (unsigned i = 0; i < PtrSize; ++i) if (*(i + (uint8_t*)Loc)) return false; @@ -501,7 +501,8 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return 0; } - if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) { + if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0 && + ExecutionEngine::MCJITCtor == 0) { if (ErrorStr) *ErrorStr = "JIT has not been linked in."; } @@ -643,15 +644,17 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { } case Instruction::PtrToInt: { GenericValue GV = getConstantValue(Op0); - uint32_t PtrWidth = TD->getPointerSizeInBits(); + uint32_t PtrWidth = TD->getTypeSizeInBits(Op0->getType()); + assert(PtrWidth <= 64 && "Bad pointer width"); GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal)); + uint32_t IntWidth = TD->getTypeSizeInBits(CE->getType()); + GV.IntVal = GV.IntVal.zextOrTrunc(IntWidth); return GV; } case Instruction::IntToPtr: { GenericValue GV = getConstantValue(Op0); - uint32_t PtrWidth = TD->getPointerSizeInBits(); - if (PtrWidth != GV.IntVal.getBitWidth()) - GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth); + uint32_t PtrWidth = TD->getTypeSizeInBits(CE->getType()); + GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth); assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width"); GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue())); return GV; @@ -832,7 +835,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes) { assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); - uint8_t *Src = (uint8_t *)IntVal.getRawData(); + const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); if (sys::isLittleEndianHost()) { // Little-endian host - the source is ordered from LSB to MSB. Order the @@ -855,7 +858,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, GenericValue *Ptr, Type *Ty) { - const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty); + const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty); switch (Ty->getTypeID()) { case Type::IntegerTyID: @@ -881,7 +884,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, dbgs() << "Cannot store value of type " << *Ty << "!\n"; } - if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) + if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian()) // Host and target are different endian - reverse the stored bytes. std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr); } @@ -917,7 +920,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr, Type *Ty) { - const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty); + const unsigned LoadBytes = getDataLayout()->getTypeStoreSize(Ty); switch (Ty->getTypeID()) { case Type::IntegerTyID: @@ -958,20 +961,20 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { if (const ConstantVector *CP = dyn_cast(Init)) { unsigned ElementSize = - getTargetData()->getTypeAllocSize(CP->getType()->getElementType()); + getDataLayout()->getTypeAllocSize(CP->getType()->getElementType()); for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize); return; } if (isa(Init)) { - memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType())); + memset(Addr, 0, (size_t)getDataLayout()->getTypeAllocSize(Init->getType())); return; } if (const ConstantArray *CPA = dyn_cast(Init)) { unsigned ElementSize = - getTargetData()->getTypeAllocSize(CPA->getType()->getElementType()); + getDataLayout()->getTypeAllocSize(CPA->getType()->getElementType()); for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize); return; @@ -979,7 +982,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { if (const ConstantStruct *CPS = dyn_cast(Init)) { const StructLayout *SL = - getTargetData()->getStructLayout(cast(CPS->getType())); + getDataLayout()->getStructLayout(cast(CPS->getType())); for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i)); return; @@ -1126,7 +1129,7 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) { InitializeMemory(GV->getInitializer(), GA); Type *ElTy = GV->getType()->getElementType(); - size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy); + size_t GVSize = (size_t)getDataLayout()->getTypeAllocSize(ElTy); NumInitBytes += (unsigned)GVSize; ++NumGlobals; } diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 75e680a..1e790e7 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -239,7 +239,7 @@ void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) } LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) { - return wrap(unwrap(EE)->getTargetData()); + return wrap(unwrap(EE)->getDataLayout()); } void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global, diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt index 7d67d0d..3483088 100644 --- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt @@ -1,11 +1,6 @@ - -include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -set(system_libs - ${system_libs} - jitprofiling - ) +include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMIntelJITEvents IntelJITEventListener.cpp + jitprofiling.c ) diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index c11c17e..4cb0270 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -22,12 +22,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Errno.h" #include "llvm/Support/ValueHandle.h" #include "EventListenerCommon.h" +#include "IntelJITEventsWrapper.h" using namespace llvm; using namespace llvm::jitprofiling; @@ -37,13 +37,13 @@ namespace { class IntelJITEventListener : public JITEventListener { typedef DenseMap MethodIDMap; - IntelJITEventsWrapper& Wrapper; + OwningPtr Wrapper; MethodIDMap MethodIDs; FilenameCache Filenames; public: - IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper) - : Wrapper(libraryWrapper) { + IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) { + Wrapper.reset(libraryWrapper); } ~IntelJITEventListener() { @@ -54,6 +54,10 @@ public: const EmittedFunctionDetails &Details); virtual void NotifyFreeingMachineCode(void *OldPtr); + + virtual void NotifyObjectEmitted(const ObjectImage &Obj); + + virtual void NotifyFreeingObject(const ObjectImage &Obj); }; static LineNumberInfo LineStartToIntelJITFormat( @@ -94,7 +98,7 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat( void IntelJITEventListener::NotifyFunctionEmitted( const Function &F, void *FnStart, size_t FnSize, const EmittedFunctionDetails &Details) { - iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper, + iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper, F.getName().data(), reinterpret_cast(FnStart), FnSize); @@ -151,32 +155,36 @@ void IntelJITEventListener::NotifyFunctionEmitted( FunctionMessage.line_number_table = 0; } - Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, - &FunctionMessage); + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + &FunctionMessage); MethodIDs[FnStart] = FunctionMessage.method_id; } void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { MethodIDMap::iterator I = MethodIDs.find(FnStart); if (I != MethodIDs.end()) { - Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); MethodIDs.erase(I); } } +void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { +} + +void IntelJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) { +} + } // anonymous namespace. namespace llvm { JITEventListener *JITEventListener::createIntelJITEventListener() { - static OwningPtr JITProfilingWrapper( - new IntelJITEventsWrapper); - return new IntelJITEventListener(*JITProfilingWrapper); + return new IntelJITEventListener(new IntelJITEventsWrapper); } // for testing JITEventListener *JITEventListener::createIntelJITEventListener( IntelJITEventsWrapper* TestImpl) { - return new IntelJITEventListener(*TestImpl); + return new IntelJITEventListener(TestImpl); } } // namespace llvm diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h new file mode 100644 index 0000000..7ab08e1 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h @@ -0,0 +1,102 @@ +//===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a wrapper for the Intel JIT Events API. It allows for the +// implementation of the jitprofiling library to be swapped with an alternative +// implementation (for testing). To include this file, you must have the +// jitprofiling.h header available; it is available in Intel(R) VTune(TM) +// Amplifier XE 2011. +// +//===----------------------------------------------------------------------===// + +#ifndef INTEL_JIT_EVENTS_WRAPPER_H +#define INTEL_JIT_EVENTS_WRAPPER_H + +#include "jitprofiling.h" + +namespace llvm { + +class IntelJITEventsWrapper { + // Function pointer types for testing implementation of Intel jitprofiling + // library + typedef int (*NotifyEventPtr)(iJIT_JVM_EVENT, void*); + typedef void (*RegisterCallbackExPtr)(void *, iJIT_ModeChangedEx ); + typedef iJIT_IsProfilingActiveFlags (*IsProfilingActivePtr)(void); + typedef void (*FinalizeThreadPtr)(void); + typedef void (*FinalizeProcessPtr)(void); + typedef unsigned int (*GetNewMethodIDPtr)(void); + + NotifyEventPtr NotifyEventFunc; + RegisterCallbackExPtr RegisterCallbackExFunc; + IsProfilingActivePtr IsProfilingActiveFunc; + FinalizeThreadPtr FinalizeThreadFunc; + FinalizeProcessPtr FinalizeProcessFunc; + GetNewMethodIDPtr GetNewMethodIDFunc; + +public: + bool isAmplifierRunning() { + return iJIT_IsProfilingActive() == iJIT_SAMPLING_ON; + } + + IntelJITEventsWrapper() + : NotifyEventFunc(::iJIT_NotifyEvent), + RegisterCallbackExFunc(::iJIT_RegisterCallbackEx), + IsProfilingActiveFunc(::iJIT_IsProfilingActive), + FinalizeThreadFunc(::FinalizeThread), + FinalizeProcessFunc(::FinalizeProcess), + GetNewMethodIDFunc(::iJIT_GetNewMethodID) { + } + + IntelJITEventsWrapper(NotifyEventPtr NotifyEventImpl, + RegisterCallbackExPtr RegisterCallbackExImpl, + IsProfilingActivePtr IsProfilingActiveImpl, + FinalizeThreadPtr FinalizeThreadImpl, + FinalizeProcessPtr FinalizeProcessImpl, + GetNewMethodIDPtr GetNewMethodIDImpl) + : NotifyEventFunc(NotifyEventImpl), + RegisterCallbackExFunc(RegisterCallbackExImpl), + IsProfilingActiveFunc(IsProfilingActiveImpl), + FinalizeThreadFunc(FinalizeThreadImpl), + FinalizeProcessFunc(FinalizeProcessImpl), + GetNewMethodIDFunc(GetNewMethodIDImpl) { + } + + // Sends an event anncouncing that a function has been emitted + // return values are event-specific. See Intel documentation for details. + int iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) { + if (!NotifyEventFunc) + return -1; + return NotifyEventFunc(EventType, EventSpecificData); + } + + // Registers a callback function to receive notice of profiling state changes + void iJIT_RegisterCallbackEx(void *UserData, + iJIT_ModeChangedEx NewModeCallBackFuncEx) { + if (RegisterCallbackExFunc) + RegisterCallbackExFunc(UserData, NewModeCallBackFuncEx); + } + + // Returns the current profiler mode + iJIT_IsProfilingActiveFlags iJIT_IsProfilingActive(void) { + if (!IsProfilingActiveFunc) + return iJIT_NOTHING_RUNNING; + return IsProfilingActiveFunc(); + } + + // Generates a locally unique method ID for use in code registration + unsigned int iJIT_GetNewMethodID(void) { + if (!GetNewMethodIDFunc) + return -1; + return GetNewMethodIDFunc(); + } +}; + +} //namespace llvm + +#endif //INTEL_JIT_EVENTS_WRAPPER_H diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile index ba75ac6..dcf3126 100644 --- a/lib/ExecutionEngine/IntelJITEvents/Makefile +++ b/lib/ExecutionEngine/IntelJITEvents/Makefile @@ -11,7 +11,8 @@ LIBRARYNAME = LLVMIntelJITEvents include $(LEVEL)/Makefile.config -SOURCES := IntelJITEventListener.cpp -CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. +SOURCES := IntelJITEventListener.cpp \ + jitprofiling.c +CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h new file mode 100644 index 0000000..1f029fb --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h @@ -0,0 +1,454 @@ +/*===-- ittnotify_config.h - JIT Profiling API internal config-----*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API internal config. + * + * NOTE: This file comes in a style different from the rest of LLVM + * source base since this is a piece of code shared from Intel(R) + * products. Please do not reformat / re-style this code to make + * subsequent merges and contributions from the original source base eaiser. + * + *===----------------------------------------------------------------------===*/ +#ifndef _ITTNOTIFY_CONFIG_H_ +#define _ITTNOTIFY_CONFIG_H_ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif /* _WIN32 */ +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#if defined(UNICODE) || defined(_UNICODE) +#include +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define CDECL /* not actual on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define CDECL __attribute__ ((cdecl)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define STDCALL /* not supported on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define STDCALL __attribute__ ((stdcall)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI CDECL +#define LIBITTAPI CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL CDECL +#define LIBITTAPI_CALL CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#endif /* __STRICT_ANSI__ */ +#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifndef ITT_ARCH_IA32 +# define ITT_ARCH_IA32 1 +#endif /* ITT_ARCH_IA32 */ + +#ifndef ITT_ARCH_IA32E +# define ITT_ARCH_IA32E 2 +#endif /* ITT_ARCH_IA32E */ + +#ifndef ITT_ARCH_IA64 +# define ITT_ARCH_IA64 3 +#endif /* ITT_ARCH_IA64 */ + +#ifndef ITT_ARCH +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define ITT_ARCH ITT_ARCH_IA32E +# elif defined _M_IA64 || defined __ia64 +# define ITT_ARCH ITT_ARCH_IA64 +# else +# define ITT_ARCH ITT_ARCH_IA32 +# endif +#endif + +#ifdef __cplusplus +# define ITT_EXTERN_C extern "C" +#else +# define ITT_EXTERN_C /* nothing */ +#endif /* __cplusplus */ + +#define ITT_TO_STR_AUX(x) #x +#define ITT_TO_STR(x) ITT_TO_STR_AUX(x) + +#define __ITT_BUILD_ASSERT(expr, suffix) do { \ + static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ + __itt_build_check_##suffix[0] = 0; \ +} while(0) +#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix) +#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__) + +#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } + +/* Replace with snapshot date YYYYMMDD for promotion build. */ +#define API_VERSION_BUILD 20111111 + +#ifndef API_VERSION_NUM +#define API_VERSION_NUM 0.0.0 +#endif /* API_VERSION_NUM */ + +#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ + " (" ITT_TO_STR(API_VERSION_BUILD) ")" + +/* OS communication functions */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include +typedef HMODULE lib_t; +typedef DWORD TIDT; +typedef CRITICAL_SECTION mutex_t; +#define MUTEX_INITIALIZER { 0 } +#define strong_alias(name, aliasname) /* empty for Windows */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#if defined(UNICODE) || defined(_UNICODE) +#include +#endif /* UNICODE */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ +#endif /* _GNU_SOURCE */ +#include +typedef void* lib_t; +typedef pthread_t TIDT; +typedef pthread_mutex_t mutex_t; +#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); +#define strong_alias(name, aliasname) _strong_alias(name, aliasname) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_get_proc(lib, name) GetProcAddress(lib, name) +#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex) +#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex) +#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) +#define __itt_load_lib(name) LoadLibraryA(name) +#define __itt_unload_lib(handle) FreeLibrary(handle) +#define __itt_system_error() (int)GetLastError() +#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2) +#define __itt_fstrlen(s) lstrlenA(s) +#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l) +#define __itt_fstrdup(s) _strdup(s) +#define __itt_thread_id() GetCurrentThreadId() +#define __itt_thread_yield() SwitchToThread() +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return InterlockedIncrement(ptr); +} +#endif /* ITT_SIMPLE_INIT */ +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +#define __itt_get_proc(lib, name) dlsym(lib, name) +#define __itt_mutex_init(mutex) {\ + pthread_mutexattr_t mutex_attr; \ + int error_code = pthread_mutexattr_init(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \ + error_code); \ + error_code = pthread_mutexattr_settype(&mutex_attr, \ + PTHREAD_MUTEX_RECURSIVE); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ + error_code); \ + error_code = pthread_mutex_init(mutex, &mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutex_init", \ + error_code); \ + error_code = pthread_mutexattr_destroy(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ + error_code); \ +} +#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) +#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) +#define __itt_load_lib(name) dlopen(name, RTLD_LAZY) +#define __itt_unload_lib(handle) dlclose(handle) +#define __itt_system_error() errno +#define __itt_fstrcmp(s1, s2) strcmp(s1, s2) +#define __itt_fstrlen(s) strlen(s) +#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l) +#define __itt_fstrdup(s) strdup(s) +#define __itt_thread_id() pthread_self() +#define __itt_thread_yield() sched_yield() +#if ITT_ARCH==ITT_ARCH_IA64 +#ifdef __INTEL_COMPILER +#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) +#else /* __INTEL_COMPILER */ +/* TODO: Add Support for not Intel compilers for IA64 */ +#endif /* __INTEL_COMPILER */ +#else /* ITT_ARCH!=ITT_ARCH_IA64 */ +ITT_INLINE long +__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) +{ + long result; + __asm__ __volatile__("lock\nxadd %0,%1" + : "=r"(result),"=m"(*(long*)ptr) + : "0"(addend), "m"(*(long*)ptr) + : "memory"); + return result; +} +#endif /* ITT_ARCH==ITT_ARCH_IA64 */ +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return __TBB_machine_fetchadd4(ptr, 1) + 1L; +} +#endif /* ITT_SIMPLE_INIT */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +typedef enum { + __itt_collection_normal = 0, + __itt_collection_paused = 1 +} __itt_collection_state; + +typedef enum { + __itt_thread_normal = 0, + __itt_thread_ignored = 1 +} __itt_thread_state; + +#pragma pack(push, 8) + +typedef struct ___itt_thread_info +{ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + TIDT tid; + __itt_thread_state state; /*!< Thread state (paused or normal) */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_thread_info* next; +} __itt_thread_info; + +#include "ittnotify_types.h" /* For __itt_group_id definition */ + +typedef struct ___itt_api_info_20101001 +{ + const char* name; + void** func_ptr; + void* init_func; + __itt_group_id group; +} __itt_api_info_20101001; + +typedef struct ___itt_api_info +{ + const char* name; + void** func_ptr; + void* init_func; + void* null_func; + __itt_group_id group; +} __itt_api_info; + +struct ___itt_domain; +struct ___itt_string_handle; + +typedef struct ___itt_global +{ + unsigned char magic[8]; + unsigned long version_major; + unsigned long version_minor; + unsigned long version_build; + volatile long api_initialized; + volatile long mutex_initialized; + volatile long atomic_counter; + mutex_t mutex; + lib_t lib; + void* error_handler; + const char** dll_path_ptr; + __itt_api_info* api_list_ptr; + struct ___itt_global* next; + /* Joinable structures below */ + __itt_thread_info* thread_list; + struct ___itt_domain* domain_list; + struct ___itt_string_handle* string_list; + __itt_collection_state state; +} __itt_global; + +#pragma pack(pop) + +#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = NULL; \ + h->nameW = n ? _wcsdup(n) : NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = n ? __itt_fstrdup(n) : NULL; \ + h->nameW = NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 0; /* domain is disabled by default */ \ + h->nameA = NULL; \ + h->nameW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 0; /* domain is disabled by default */ \ + h->nameA = name ? __itt_fstrdup(name) : NULL; \ + h->nameW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = NULL; \ + h->strW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = name ? __itt_fstrdup(name) : NULL; \ + h->strW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#endif /* _ITTNOTIFY_CONFIG_H_ */ diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h new file mode 100644 index 0000000..5df752f --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h @@ -0,0 +1,70 @@ +/*===-- ittnotify_types.h - JIT Profiling API internal types--------*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * NOTE: This file comes in a style different from the rest of LLVM + * source base since this is a piece of code shared from Intel(R) + * products. Please do not reformat / re-style this code to make + * subsequent merges and contributions from the original source base eaiser. + * + *===----------------------------------------------------------------------===*/ +#ifndef _ITTNOTIFY_TYPES_H_ +#define _ITTNOTIFY_TYPES_H_ + +typedef enum ___itt_group_id +{ + __itt_group_none = 0, + __itt_group_legacy = 1<<0, + __itt_group_control = 1<<1, + __itt_group_thread = 1<<2, + __itt_group_mark = 1<<3, + __itt_group_sync = 1<<4, + __itt_group_fsync = 1<<5, + __itt_group_jit = 1<<6, + __itt_group_model = 1<<7, + __itt_group_splitter_min = 1<<7, + __itt_group_counter = 1<<8, + __itt_group_frame = 1<<9, + __itt_group_stitch = 1<<10, + __itt_group_heap = 1<<11, + __itt_group_splitter_max = 1<<12, + __itt_group_structure = 1<<12, + __itt_group_suppress = 1<<13, + __itt_group_all = -1 +} __itt_group_id; + +#pragma pack(push, 8) + +typedef struct ___itt_group_list +{ + __itt_group_id id; + const char* name; +} __itt_group_list; + +#pragma pack(pop) + +#define ITT_GROUP_LIST(varname) \ + static __itt_group_list varname[] = { \ + { __itt_group_all, "all" }, \ + { __itt_group_control, "control" }, \ + { __itt_group_thread, "thread" }, \ + { __itt_group_mark, "mark" }, \ + { __itt_group_sync, "sync" }, \ + { __itt_group_fsync, "fsync" }, \ + { __itt_group_jit, "jit" }, \ + { __itt_group_model, "model" }, \ + { __itt_group_counter, "counter" }, \ + { __itt_group_frame, "frame" }, \ + { __itt_group_stitch, "stitch" }, \ + { __itt_group_heap, "heap" }, \ + { __itt_group_structure, "structure" }, \ + { __itt_group_suppress, "suppress" }, \ + { __itt_group_none, NULL } \ + } + +#endif /* _ITTNOTIFY_TYPES_H_ */ diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c new file mode 100644 index 0000000..7b507de --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c @@ -0,0 +1,481 @@ +/*===-- jitprofiling.c - JIT (Just-In-Time) Profiling API----------*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API implementation. + * + * NOTE: This file comes in a style different from the rest of LLVM + * source base since this is a piece of code shared from Intel(R) + * products. Please do not reformat / re-style this code to make + * subsequent merges and contributions from the original source base eaiser. + * + *===----------------------------------------------------------------------===*/ +#include "ittnotify_config.h" + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include +#pragma optimize("", off) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#include +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#include + +#include "jitprofiling.h" + +static const char rcsid[] = "\n@(#) $Revision: 243501 $\n"; + +#define DLL_ENVIRONMENT_VAR "VS_PROFILER" + +#ifndef NEW_DLL_ENVIRONMENT_VAR +#if ITT_ARCH==ITT_ARCH_IA32 +#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER32" +#else +#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER64" +#endif +#endif /* NEW_DLL_ENVIRONMENT_VAR */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define DEFAULT_DLLNAME "JitPI.dll" +HINSTANCE m_libHandle = NULL; +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define DEFAULT_DLLNAME "libJitPI.so" +void* m_libHandle = NULL; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/* default location of JIT profiling agent on Android */ +#define ANDROID_JIT_AGENT_PATH "/data/intel/libittnotify.so" + +/* the function pointers */ +typedef unsigned int(*TPInitialize)(void); +static TPInitialize FUNC_Initialize=NULL; + +typedef unsigned int(*TPNotify)(unsigned int, void*); +static TPNotify FUNC_NotifyEvent=NULL; + +static iJIT_IsProfilingActiveFlags executionMode = iJIT_NOTHING_RUNNING; + +/* end collector dll part. */ + +/* loadiJIT_Funcs() : this function is called just in the beginning + * and is responsible to load the functions from BistroJavaCollector.dll + * result: + * on success: the functions loads, iJIT_DLL_is_missing=0, return value = 1 + * on failure: the functions are NULL, iJIT_DLL_is_missing=1, return value = 0 + */ +static int loadiJIT_Funcs(void); + +/* global representing whether the BistroJavaCollector can't be loaded */ +static int iJIT_DLL_is_missing = 0; + +/* Virtual stack - the struct is used as a virtual stack for each thread. + * Every thread initializes with a stack of size INIT_TOP_STACK. + * Every method entry decreases from the current stack point, + * and when a thread stack reaches its top of stack (return from the global + * function), the top of stack and the current stack increase. Notice that + * when returning from a function the stack pointer is the address of + * the function return. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static DWORD threadLocalStorageHandle = 0; +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static pthread_key_t threadLocalStorageHandle = (pthread_key_t)0; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#define INIT_TOP_Stack 10000 + +typedef struct +{ + unsigned int TopStack; + unsigned int CurrentStack; +} ThreadStack, *pThreadStack; + +/* end of virtual stack. */ + +/* + * The function for reporting virtual-machine related events to VTune. + * Note: when reporting iJVM_EVENT_TYPE_ENTER_NIDS, there is no need to fill + * in the stack_id field in the iJIT_Method_NIDS structure, as VTune fills it. + * The return value in iJVM_EVENT_TYPE_ENTER_NIDS && + * iJVM_EVENT_TYPE_LEAVE_NIDS events will be 0 in case of failure. + * in iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED event + * it will be -1 if EventSpecificData == 0 otherwise it will be 0. +*/ + +ITT_EXTERN_C int JITAPI +iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData) +{ + int ReturnValue; + + /* + * This section is for debugging outside of VTune. + * It creates the environment variables that indicates call graph mode. + * If running outside of VTune remove the remark. + * + * + * static int firstTime = 1; + * char DoCallGraph[12] = "DoCallGraph"; + * if (firstTime) + * { + * firstTime = 0; + * SetEnvironmentVariable( "BISTRO_COLLECTORS_DO_CALLGRAPH", DoCallGraph); + * } + * + * end of section. + */ + + /* initialization part - the functions have not been loaded yet. This part + * will load the functions, and check if we are in Call Graph mode. + * (for special treatment). + */ + if (!FUNC_NotifyEvent) + { + if (iJIT_DLL_is_missing) + return 0; + + /* load the Function from the DLL */ + if (!loadiJIT_Funcs()) + return 0; + + /* Call Graph initialization. */ + } + + /* If the event is method entry/exit, check that in the current mode + * VTune is allowed to receive it + */ + if ((event_type == iJVM_EVENT_TYPE_ENTER_NIDS || + event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) && + (executionMode != iJIT_CALLGRAPH_ON)) + { + return 0; + } + /* This section is performed when method enter event occurs. + * It updates the virtual stack, or creates it if this is the first + * method entry in the thread. The stack pointer is decreased. + */ + if (event_type == iJVM_EVENT_TYPE_ENTER_NIDS) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 ) + return 0; + + if (!threadStack) + { + /* initialize the stack. */ + threadStack = (pThreadStack) calloc (sizeof(ThreadStack), 1); + threadStack->TopStack = INIT_TOP_Stack; + threadStack->CurrentStack = INIT_TOP_Stack; +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsSetValue(threadLocalStorageHandle,(void*)threadStack); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_setspecific(threadLocalStorageHandle,(void*)threadStack); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + /* decrease the stack. */ + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + (threadStack->CurrentStack)--; + } + + /* This section is performed when method leave event occurs + * It updates the virtual stack. + * Increases the stack pointer. + * If the stack pointer reached the top (left the global function) + * increase the pointer and the top pointer. + */ + if (event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 ) + return 0; + + if (!threadStack) + { + /* Error: first report in this thread is method exit */ + exit (1); + } + + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + ++(threadStack->CurrentStack) + 1; + + if (((piJIT_Method_NIDS) EventSpecificData)->stack_id + > threadStack->TopStack) + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + (unsigned int)-1; + } + + if (event_type == iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED) + { + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_Load) EventSpecificData)->method_id <= 999 ) + return 0; + } + + ReturnValue = (int)FUNC_NotifyEvent(event_type, EventSpecificData); + + return ReturnValue; +} + +/* The new mode call back routine */ +ITT_EXTERN_C void JITAPI +iJIT_RegisterCallbackEx(void *userdata, iJIT_ModeChangedEx + NewModeCallBackFuncEx) +{ + /* is it already missing... or the load of functions from the DLL failed */ + if (iJIT_DLL_is_missing || !loadiJIT_Funcs()) + { + /* then do not bother with notifications */ + NewModeCallBackFuncEx(userdata, iJIT_NO_NOTIFICATIONS); + /* Error: could not load JIT functions. */ + return; + } + /* nothing to do with the callback */ +} + +/* + * This function allows the user to query in which mode, if at all, + *VTune is running + */ +ITT_EXTERN_C iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive() +{ + if (!iJIT_DLL_is_missing) + { + loadiJIT_Funcs(); + } + + return executionMode; +} + +/* this function loads the collector dll (BistroJavaCollector) + * and the relevant functions. + * on success: all functions load, iJIT_DLL_is_missing = 0, return value = 1 + * on failure: all functions are NULL, iJIT_DLL_is_missing = 1, return value = 0 + */ +static int loadiJIT_Funcs() +{ + static int bDllWasLoaded = 0; + char *dllName = (char*)rcsid; /* !! Just to avoid unused code elimination */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN + DWORD dNameLength = 0; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + if(bDllWasLoaded) + { + /* dll was already loaded, no need to do it for the second time */ + return 1; + } + + /* Assumes that the DLL will not be found */ + iJIT_DLL_is_missing = 1; + FUNC_NotifyEvent = NULL; + + if (m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FreeLibrary(m_libHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dlclose(m_libHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = NULL; + } + + /* Try to get the dll name from the environment */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN + dNameLength = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, NULL, 0); + if (dNameLength) + { + DWORD envret = 0; + dllName = (char*)malloc(sizeof(char) * (dNameLength + 1)); + envret = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, + dllName, dNameLength); + if (envret) + { + /* Try to load the dll from the PATH... */ + m_libHandle = LoadLibraryExA(dllName, + NULL, LOAD_WITH_ALTERED_SEARCH_PATH); + } + free(dllName); + } else { + /* Try to use old VS_PROFILER variable */ + dNameLength = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, NULL, 0); + if (dNameLength) + { + DWORD envret = 0; + dllName = (char*)malloc(sizeof(char) * (dNameLength + 1)); + envret = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, + dllName, dNameLength); + if (envret) + { + /* Try to load the dll from the PATH... */ + m_libHandle = LoadLibraryA(dllName); + } + free(dllName); + } + } +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dllName = getenv(NEW_DLL_ENVIRONMENT_VAR); + if (!dllName) + dllName = getenv(DLL_ENVIRONMENT_VAR); +#ifdef ANDROID + if (!dllName) + dllName = ANDROID_JIT_AGENT_PATH; +#endif + if (dllName) + { + /* Try to load the dll from the PATH... */ + m_libHandle = dlopen(dllName, RTLD_LAZY); + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + if (!m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + m_libHandle = LoadLibraryA(DEFAULT_DLLNAME); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = dlopen(DEFAULT_DLLNAME, RTLD_LAZY); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + /* if the dll wasn't loaded - exit. */ + if (!m_libHandle) + { + iJIT_DLL_is_missing = 1; /* don't try to initialize + * JIT agent the second time + */ + return 0; + } + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FUNC_NotifyEvent = (TPNotify)GetProcAddress(m_libHandle, "NotifyEvent"); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + FUNC_NotifyEvent = (TPNotify)dlsym(m_libHandle, "NotifyEvent"); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (!FUNC_NotifyEvent) + { + FUNC_Initialize = NULL; + return 0; + } + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FUNC_Initialize = (TPInitialize)GetProcAddress(m_libHandle, "Initialize"); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + FUNC_Initialize = (TPInitialize)dlsym(m_libHandle, "Initialize"); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (!FUNC_Initialize) + { + FUNC_NotifyEvent = NULL; + return 0; + } + + executionMode = (iJIT_IsProfilingActiveFlags)FUNC_Initialize(); + + bDllWasLoaded = 1; + iJIT_DLL_is_missing = 0; /* DLL is ok. */ + + /* + * Call Graph mode: init the thread local storage + * (need to store the virtual stack there). + */ + if ( executionMode == iJIT_CALLGRAPH_ON ) + { + /* Allocate a thread local storage slot for the thread "stack" */ + if (!threadLocalStorageHandle) +#if ITT_PLATFORM==ITT_PLATFORM_WIN + threadLocalStorageHandle = TlsAlloc(); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_key_create(&threadLocalStorageHandle, NULL); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + return 1; +} + +/* + * This function should be called by the user whenever a thread ends, + * to free the thread "virtual stack" storage + */ +ITT_EXTERN_C void JITAPI FinalizeThread() +{ + if (threadLocalStorageHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (threadStack) + { + free (threadStack); + threadStack = NULL; +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsSetValue (threadLocalStorageHandle, threadStack); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_setspecific(threadLocalStorageHandle, threadStack); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + } +} + +/* + * This function should be called by the user when the process ends, + * to free the local storage index +*/ +ITT_EXTERN_C void JITAPI FinalizeProcess() +{ + if (m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FreeLibrary(m_libHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dlclose(m_libHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = NULL; + } + + if (threadLocalStorageHandle) +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsFree (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_key_delete(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +} + +/* + * This function should be called by the user for any method once. + * The function will return a unique method ID, the user should maintain + * the ID for each method + */ +ITT_EXTERN_C unsigned int JITAPI iJIT_GetNewMethodID() +{ + static unsigned int methodID = 0x100000; + + if (methodID == 0) + return 0; /* ERROR : this is not a valid value */ + + return methodID++; +} diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h new file mode 100644 index 0000000..f08e287 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h @@ -0,0 +1,259 @@ +/*===-- jitprofiling.h - JIT Profiling API-------------------------*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API declaration. + * + * NOTE: This file comes in a style different from the rest of LLVM + * source base since this is a piece of code shared from Intel(R) + * products. Please do not reformat / re-style this code to make + * subsequent merges and contributions from the original source base eaiser. + * + *===----------------------------------------------------------------------===*/ +#ifndef __JITPROFILING_H__ +#define __JITPROFILING_H__ + +/* + * Various constants used by functions + */ + +/* event notification */ +typedef enum iJIT_jvm_event +{ + + /* shutdown */ + + /* + * Program exiting EventSpecificData NA + */ + iJVM_EVENT_TYPE_SHUTDOWN = 2, + + /* JIT profiling */ + + /* + * issued after method code jitted into memory but before code is executed + * EventSpecificData is an iJIT_Method_Load + */ + iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13, + + /* issued before unload. Method code will no longer be executed, but code + * and info are still in memory. The VTune profiler may capture method + * code only at this point EventSpecificData is iJIT_Method_Id + */ + iJVM_EVENT_TYPE_METHOD_UNLOAD_START, + + /* Method Profiling */ + + /* method name, Id and stack is supplied + * issued when a method is about to be entered EventSpecificData is + * iJIT_Method_NIDS + */ + iJVM_EVENT_TYPE_ENTER_NIDS = 19, + + /* method name, Id and stack is supplied + * issued when a method is about to be left EventSpecificData is + * iJIT_Method_NIDS + */ + iJVM_EVENT_TYPE_LEAVE_NIDS +} iJIT_JVM_EVENT; + +typedef enum _iJIT_ModeFlags +{ + /* No need to Notify VTune, since VTune is not running */ + iJIT_NO_NOTIFICATIONS = 0x0000, + + /* when turned on the jit must call + * iJIT_NotifyEvent + * ( + * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + * ) + * for all the method already jitted + */ + iJIT_BE_NOTIFY_ON_LOAD = 0x0001, + + /* when turned on the jit must call + * iJIT_NotifyEvent + * ( + * iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED, + * ) for all the method that are unloaded + */ + iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002, + + /* when turned on the jit must instrument all + * the currently jited code with calls on + * method entries + */ + iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004, + + /* when turned on the jit must instrument all + * the currently jited code with calls + * on method exit + */ + iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008 + +} iJIT_ModeFlags; + + + /* Flags used by iJIT_IsProfilingActive() */ +typedef enum _iJIT_IsProfilingActiveFlags +{ + /* No profiler is running. Currently not used */ + iJIT_NOTHING_RUNNING = 0x0000, + + /* Sampling is running. This is the default value + * returned by iJIT_IsProfilingActive() + */ + iJIT_SAMPLING_ON = 0x0001, + + /* Call Graph is running */ + iJIT_CALLGRAPH_ON = 0x0002 + +} iJIT_IsProfilingActiveFlags; + +/* Enumerator for the environment of methods*/ +typedef enum _iJDEnvironmentType +{ + iJDE_JittingAPI = 2 +} iJDEnvironmentType; + +/********************************** + * Data structures for the events * + **********************************/ + +/* structure for the events: + * iJVM_EVENT_TYPE_METHOD_UNLOAD_START + */ + +typedef struct _iJIT_Method_Id +{ + /* Id of the method (same as the one passed in + * the iJIT_Method_Load struct + */ + unsigned int method_id; + +} *piJIT_Method_Id, iJIT_Method_Id; + + +/* structure for the events: + * iJVM_EVENT_TYPE_ENTER_NIDS, + * iJVM_EVENT_TYPE_LEAVE_NIDS, + * iJVM_EVENT_TYPE_EXCEPTION_OCCURRED_NIDS + */ + +typedef struct _iJIT_Method_NIDS +{ + /* unique method ID */ + unsigned int method_id; + + /* NOTE: no need to fill this field, it's filled by VTune */ + unsigned int stack_id; + + /* method name (just the method, without the class) */ + char* method_name; +} *piJIT_Method_NIDS, iJIT_Method_NIDS; + +/* structures for the events: + * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED + */ + +typedef struct _LineNumberInfo +{ + /* x86 Offset from the begining of the method*/ + unsigned int Offset; + + /* source line number from the begining of the source file */ + unsigned int LineNumber; + +} *pLineNumberInfo, LineNumberInfo; + +typedef struct _iJIT_Method_Load +{ + /* unique method ID - can be any unique value, (except 0 - 999) */ + unsigned int method_id; + + /* method name (can be with or without the class and signature, in any case + * the class name will be added to it) + */ + char* method_name; + + /* virtual address of that method - This determines the method range for the + * iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events + */ + void* method_load_address; + + /* Size in memory - Must be exact */ + unsigned int method_size; + + /* Line Table size in number of entries - Zero if none */ + unsigned int line_number_size; + + /* Pointer to the begining of the line numbers info array */ + pLineNumberInfo line_number_table; + + /* unique class ID */ + unsigned int class_id; + + /* class file name */ + char* class_file_name; + + /* source file name */ + char* source_file_name; + + /* bits supplied by the user for saving in the JIT file */ + void* user_data; + + /* the size of the user data buffer */ + unsigned int user_data_size; + + /* NOTE: no need to fill this field, it's filled by VTune */ + iJDEnvironmentType env; + +} *piJIT_Method_Load, iJIT_Method_Load; + +/* API Functions */ +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef CDECL +# if defined WIN32 || defined _WIN32 +# define CDECL __cdecl +# else /* defined WIN32 || defined _WIN32 */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define CDECL /* not actual on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define CDECL __attribute__ ((cdecl)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* defined WIN32 || defined _WIN32 */ +#endif /* CDECL */ + +#define JITAPI CDECL + +/* called when the settings are changed with new settings */ +typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags); + +int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData); + +/* The new mode call back routine */ +void JITAPI iJIT_RegisterCallbackEx(void *userdata, + iJIT_ModeChangedEx NewModeCallBackFuncEx); + +iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void); + +void JITAPI FinalizeThread(void); + +void JITAPI FinalizeProcess(void); + +unsigned int JITAPI iJIT_GetNewMethodID(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __JITPROFILING_H__ */ diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 7a206eb..e16e2d1 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -25,7 +25,7 @@ #include "llvm/Config/config.h" // Detect libffi #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/DynamicLibrary.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include @@ -180,7 +180,7 @@ static void *ffiValueFor(Type *Ty, const GenericValue &AV, static bool ffiInvoke(RawFunc Fn, Function *F, const std::vector &ArgVals, - const TargetData *TD, GenericValue &Result) { + const DataLayout *TD, GenericValue &Result) { ffi_cif cif; FunctionType *FTy = F->getFunctionType(); const unsigned NumArgs = F->arg_size(); @@ -276,7 +276,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, FunctionsLock->release(); GenericValue Result; - if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getTargetData(), Result)) + if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getDataLayout(), Result)) return Result; #endif // USE_LIBFFI @@ -376,7 +376,7 @@ GenericValue lle_X_sprintf(FunctionType *FT, case 'x': case 'X': if (HowLong >= 1) { if (HowLong == 1 && - TheInterpreter->getTargetData()->getPointerSizeInBits() == 64 && + TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 && sizeof(long) < sizeof(int64_t)) { // Make sure we use %lld with a 64 bit argument because we might be // compiling LLI on a 32 bit compiler. diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp index 43e3453..55152db 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -48,7 +48,7 @@ Interpreter::Interpreter(Module *M) : ExecutionEngine(M), TD(M) { memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); - setTargetData(&TD); + setDataLayout(&TD); // Initialize the "backend" initializeExecutionEngine(); initializeExternalFunctions(); diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index 28c5775..72c42c1 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -17,7 +17,7 @@ #include "llvm/Function.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -82,7 +82,7 @@ struct ExecutionContext { // class Interpreter : public ExecutionEngine, public InstVisitor { GenericValue ExitValue; // The return value of the called function - TargetData TD; + DataLayout TD; IntrinsicLowering *IL; // The runtime stack of executing code. The top of the stack is the current diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 97995ad..1ad3382 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -24,7 +24,7 @@ #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetJITInfo.h" #include "llvm/Support/Dwarf.h" @@ -272,7 +272,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, : ExecutionEngine(M), TM(tm), TJI(tji), JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()), AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) { - setTargetData(TM.getTargetData()); + setDataLayout(TM.getDataLayout()); jitstate = new JITState(M); @@ -285,7 +285,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, // Add target data MutexGuard locked(lock); FunctionPassManager &PM = jitstate->getPM(locked); - PM.add(new TargetData(*TM.getTargetData())); + PM.add(new DataLayout(*TM.getDataLayout())); // Turn the machine code intermediate representation into bytes in memory that // may be executed. @@ -339,7 +339,7 @@ void JIT::addModule(Module *M) { jitstate = new JITState(M); FunctionPassManager &PM = jitstate->getPM(locked); - PM.add(new TargetData(*TM.getTargetData())); + PM.add(new DataLayout(*TM.getDataLayout())); // Turn the machine code intermediate representation into bytes in memory // that may be executed. @@ -370,7 +370,7 @@ bool JIT::removeModule(Module *M) { jitstate = new JITState(Modules[0]); FunctionPassManager &PM = jitstate->getPM(locked); - PM.add(new TargetData(*TM.getTargetData())); + PM.add(new DataLayout(*TM.getDataLayout())); // Turn the machine code intermediate representation into bytes in memory // that may be executed. @@ -815,8 +815,8 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) { // through the memory manager which puts them near the code but not in the // same buffer. Type *GlobalType = GV->getType()->getElementType(); - size_t S = getTargetData()->getTypeAllocSize(GlobalType); - size_t A = getTargetData()->getPreferredAlignment(GV); + size_t S = getDataLayout()->getTypeAllocSize(GlobalType); + size_t A = getDataLayout()->getPreferredAlignment(GV); if (GV->isThreadLocal()) { MutexGuard locked(lock); Ptr = TJI.allocateThreadLocalMemory(S); diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index 42a136e..19c1979 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -24,7 +24,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -42,7 +42,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, assert(MMI && "MachineModuleInfo not registered!"); const TargetMachine& TM = F.getTarget(); - TD = TM.getTargetData(); + TD = TM.getDataLayout(); stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection(); RI = TM.getRegisterInfo(); MAI = TM.getMCAsmInfo(); diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h index 8dc99ab..9cdbeac 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h @@ -23,12 +23,12 @@ class MachineFunction; class MachineModuleInfo; class MachineMove; class MCAsmInfo; -class TargetData; +class DataLayout; class TargetMachine; class TargetRegisterInfo; class JITDwarfEmitter { - const TargetData* TD; + const DataLayout* TD; JITCodeEmitter* JCE; const TargetRegisterInfo* RI; const MCAsmInfo *MAI; diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index ff3a9dc..ecafda7 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -30,7 +30,7 @@ #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetJITInfo.h" #include "llvm/Target/TargetMachine.h" @@ -384,11 +384,6 @@ namespace { delete MemMgr; } - /// classof - Methods for support type inquiry through isa, cast, and - /// dyn_cast: - /// - static inline bool classof(const MachineCodeEmitter*) { return true; } - JITResolver &getJITResolver() { return Resolver; } virtual void startFunction(MachineFunction &F); @@ -763,7 +758,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { } static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, - const TargetData *TD) { + const DataLayout *TD) { const std::vector &Constants = MCP->getConstants(); if (Constants.empty()) return 0; @@ -780,7 +775,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, void JITEmitter::startFunction(MachineFunction &F) { DEBUG(dbgs() << "JIT: Starting CodeGen of Function " - << F.getFunction()->getName() << "\n"); + << F.getName() << "\n"); uintptr_t ActualSize = 0; // Set the memory writable, if it's not already @@ -929,7 +924,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { PrevDL = DebugLoc(); DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart - << "] Function: " << F.getFunction()->getName() + << "] Function: " << F.getName() << ": " << (FnEnd-FnStart) << " bytes of text, " << Relocations.size() << " relocations\n"); @@ -1058,7 +1053,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { const std::vector &Constants = MCP->getConstants(); if (Constants.empty()) return; - unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData()); + unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getDataLayout()); unsigned Align = MCP->getConstantPoolAlignment(); ConstantPoolBase = allocateSpace(Size, Align); ConstantPool = MCP; @@ -1087,7 +1082,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { dbgs().write_hex(CAddr) << "]\n"); Type *Ty = CPE.Val.ConstVal->getType(); - Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty); + Offset += TheJIT->getDataLayout()->getTypeAllocSize(Ty); } } @@ -1104,14 +1099,14 @@ void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) { for (unsigned i = 0, e = JT.size(); i != e; ++i) NumEntries += JT[i].MBBs.size(); - unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getTargetData()); + unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getDataLayout()); // Just allocate space for all the jump tables now. We will fix up the actual // MBB entries in the tables after we emit the code for each block, since then // we will know the final locations of the MBBs in memory. JumpTable = MJTI; JumpTableBase = allocateSpace(NumEntries * EntrySize, - MJTI->getEntryAlignment(*TheJIT->getTargetData())); + MJTI->getEntryAlignment(*TheJIT->getDataLayout())); } void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { @@ -1128,7 +1123,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { case MachineJumpTableInfo::EK_BlockAddress: { // EK_BlockAddress - Each entry is a plain address of block, e.g.: // .word LBB123 - assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) && + assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == sizeof(void*) && "Cross JIT'ing?"); // For each jump table, map each target in the jump table to the address of @@ -1148,7 +1143,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { case MachineJumpTableInfo::EK_Custom32: case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: { - assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == 4&&"Cross JIT'ing?"); + assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == 4&&"Cross JIT'ing?"); // For each jump table, place the offset from the beginning of the table // to the target address. int *SlotPtr = (int*)JumpTableBase; @@ -1224,7 +1219,7 @@ uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const { const std::vector &JT = JumpTable->getJumpTables(); assert(Index < JT.size() && "Invalid jump table index!"); - unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getTargetData()); + unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getDataLayout()); unsigned Offset = 0; for (unsigned i = 0; i < Index; ++i) @@ -1265,15 +1260,13 @@ void *JIT::getPointerToFunctionOrStub(Function *F) { return Addr; // Get a stub if the target supports it. - assert(isa(JCE) && "Unexpected MCE?"); - JITEmitter *JE = cast(getCodeEmitter()); + JITEmitter *JE = static_cast(getCodeEmitter()); return JE->getJITResolver().getLazyFunctionStub(F); } void JIT::updateFunctionStub(Function *F) { // Get the empty stub we generated earlier. - assert(isa(JCE) && "Unexpected MCE?"); - JITEmitter *JE = cast(getCodeEmitter()); + JITEmitter *JE = static_cast(getCodeEmitter()); void *Stub = JE->getJITResolver().getLazyFunctionStub(F); void *Addr = getPointerToGlobalIfAvailable(F); assert(Addr != Stub && "Function must have non-stub address to be updated."); @@ -1294,6 +1287,5 @@ void JIT::freeMachineCodeForFunction(Function *F) { updateGlobalMapping(F, 0); // Free the actual memory for the function body and related stuff. - assert(isa(JCE) && "Unexpected MCE?"); - cast(JCE)->deallocateMemForFunction(F); + static_cast(JCE)->deallocateMemForFunction(F); } diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index fef7176..2911a50 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -1,4 +1,3 @@ add_llvm_library(LLVMMCJIT MCJIT.cpp - MCJITMemoryManager.cpp ) diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 739ffd7d8..752c5b7 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -8,18 +8,20 @@ //===----------------------------------------------------------------------===// #include "MCJIT.h" -#include "MCJITMemoryManager.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/ExecutionEngine/MCJIT.h" +#include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ExecutionEngine/MCJIT.h" +#include "llvm/ExecutionEngine/ObjectBuffer.h" +#include "llvm/ExecutionEngine/ObjectImage.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MutexGuard.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; @@ -44,24 +46,20 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); - // If the target supports JIT code generation, create the JIT. - if (TargetJITInfo *TJ = TM->getJITInfo()) - return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM), GVsWithCode); - - if (ErrorStr) - *ErrorStr = "target does not support JIT code generation"; - return 0; + return new MCJIT(M, TM, JMM, GVsWithCode); } -MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, - RTDyldMemoryManager *MM, bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM), - isCompiled(false), M(m), OS(Buffer) { +MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, + bool AllocateGVsWithCode) + : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM), + isCompiled(false), M(m) { - setTargetData(TM->getTargetData()); + setDataLayout(TM->getDataLayout()); } MCJIT::~MCJIT() { + if (LoadedObject) + NotifyFreeingObject(*LoadedObject.get()); delete MemMgr; delete TM; } @@ -69,7 +67,7 @@ MCJIT::~MCJIT() { void MCJIT::emitObject(Module *m) { /// Currently, MCJIT only supports a single module and the module passed to /// this function call is expected to be the contained module. The module - /// is passed as a parameter here to prepare for multiple module support in + /// is passed as a parameter here to prepare for multiple module support in /// the future. assert(M == m); @@ -84,41 +82,65 @@ void MCJIT::emitObject(Module *m) { PassManager PM; - PM.add(new TargetData(*TM->getTargetData())); + PM.add(new DataLayout(*TM->getDataLayout())); + + // The RuntimeDyld will take ownership of this shortly + OwningPtr Buffer(new ObjectBufferStream()); // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM->addPassesToEmitMC(PM, Ctx, OS, false)) { + if (TM->addPassesToEmitMC(PM, Ctx, Buffer->getOStream(), false)) { report_fatal_error("Target does not support MC emission!"); } // Initialize passes. - // FIXME: When we support multiple modules, we'll want to move the code - // gen and finalization out of the constructor here and do it more - // on-demand as part of getPointerToFunction(). PM.run(*m); - // Flush the output buffer so the SmallVector gets its data. - OS.flush(); + // Flush the output buffer to get the generated code into memory + Buffer->flush(); // Load the object into the dynamic linker. - MemoryBuffer* MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(), - Buffer.size()), - "", false); - if (Dyld.loadObject(MB)) + // handing off ownership of the buffer + LoadedObject.reset(Dyld.loadObject(Buffer.take())); + if (!LoadedObject) report_fatal_error(Dyld.getErrorString()); // Resolve any relocations. Dyld.resolveRelocations(); + // FIXME: Make this optional, maybe even move it to a JIT event listener + LoadedObject->registerWithDebugger(); + + NotifyObjectEmitted(*LoadedObject); + // FIXME: Add support for per-module compilation state isCompiled = true; } +// FIXME: Add a parameter to identify which object is being finalized when +// MCJIT supports multiple modules. +void MCJIT::finalizeObject() { + // If the module hasn't been compiled, just do that. + if (!isCompiled) { + // If the call to Dyld.resolveRelocations() is removed from emitObject() + // we'll need to do that here. + emitObject(M); + return; + } + + // Resolve any relocations. + Dyld.resolveRelocations(); +} + void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { report_fatal_error("not yet implemented"); } void *MCJIT::getPointerToFunction(Function *F) { + // FIXME: This should really return a uint64_t since it's a pointer in the + // target address space, not our local address space. That's part of the + // ExecutionEngine interface, though. Fix that when the old JIT finally + // dies. + // FIXME: Add support for per-module compilation state if (!isCompiled) emitObject(M); @@ -132,10 +154,13 @@ void *MCJIT::getPointerToFunction(Function *F) { // FIXME: Should the Dyld be retaining module information? Probably not. // FIXME: Should we be using the mangler for this? Probably. + // + // This is the accessor for the target address, so make sure to check the + // load address of the symbol, not the local address. StringRef BaseName = F->getName(); if (BaseName[0] == '\1') - return (void*)Dyld.getSymbolAddress(BaseName.substr(1)); - return (void*)Dyld.getSymbolAddress((TM->getMCAsmInfo()->getGlobalPrefix() + return (void*)Dyld.getSymbolLoadAddress(BaseName.substr(1)); + return (void*)Dyld.getSymbolLoadAddress((TM->getMCAsmInfo()->getGlobalPrefix() + BaseName).str()); } @@ -270,3 +295,33 @@ void *MCJIT::getPointerToNamedFunction(const std::string &Name, } return 0; } + +void MCJIT::RegisterJITEventListener(JITEventListener *L) { + if (L == NULL) + return; + MutexGuard locked(lock); + EventListeners.push_back(L); +} +void MCJIT::UnregisterJITEventListener(JITEventListener *L) { + if (L == NULL) + return; + MutexGuard locked(lock); + SmallVector::reverse_iterator I= + std::find(EventListeners.rbegin(), EventListeners.rend(), L); + if (I != EventListeners.rend()) { + std::swap(*I, EventListeners.back()); + EventListeners.pop_back(); + } +} +void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) { + MutexGuard locked(lock); + for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { + EventListeners[I]->NotifyObjectEmitted(Obj); + } +} +void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) { + MutexGuard locked(lock); + for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { + EventListeners[I]->NotifyFreeingObject(Obj); + } +} diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 1d272e9..571080d 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -11,33 +11,32 @@ #define LLVM_LIB_EXECUTIONENGINE_MCJIT_H #include "llvm/PassManager.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { +class ObjectImage; + // FIXME: This makes all kinds of horrible assumptions for the time being, // like only having one module, not needing to worry about multi-threading, // blah blah. Purely in get-it-up-and-limping mode for now. class MCJIT : public ExecutionEngine { - MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji, - RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode); + MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr, + bool AllocateGVsWithCode); TargetMachine *TM; MCContext *Ctx; RTDyldMemoryManager *MemMgr; RuntimeDyld Dyld; + SmallVector EventListeners; // FIXME: Add support for multiple modules bool isCompiled; Module *M; - - // FIXME: Move these to a single container which manages JITed objects - SmallVector Buffer; // Working buffer into which we JIT. - raw_svector_ostream OS; + OwningPtr LoadedObject; public: ~MCJIT(); @@ -45,6 +44,8 @@ public: /// @name ExecutionEngine interface implementation /// @{ + virtual void finalizeObject(); + virtual void *getPointerToBasicBlock(BasicBlock *BB); virtual void *getPointerToFunction(Function *F); @@ -71,10 +72,14 @@ public: /// Map the address of a JIT section as returned from the memory manager /// to the address in the target process as the running code will see it. /// This is the address which will be used for relocation resolution. - virtual void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress) { + virtual void mapSectionAddress(const void *LocalAddress, + uint64_t TargetAddress) { Dyld.mapSectionAddress(LocalAddress, TargetAddress); } + virtual void RegisterJITEventListener(JITEventListener *L); + virtual void UnregisterJITEventListener(JITEventListener *L); + /// @} /// @name (Private) Registration Interfaces /// @{ @@ -98,6 +103,9 @@ protected: /// is passed as a parameter here to prepare for multiple module support in /// the future. void emitObject(Module *M); + + void NotifyObjectEmitted(const ObjectImage& Obj); + void NotifyFreeingObject(const ObjectImage& Obj); }; } // End llvm namespace diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp deleted file mode 100644 index 457fe5e..0000000 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//==-- MCJITMemoryManager.cpp - Definition for the Memory Manager -*-C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCJITMemoryManager.h" - -using namespace llvm; - -void MCJITMemoryManager::anchor() { } diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h deleted file mode 100644 index 441aaeb..0000000 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h +++ /dev/null @@ -1,50 +0,0 @@ -//===-- MCJITMemoryManager.h - Definition for the Memory Manager ---C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H -#define LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H - -#include "llvm/Module.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" -#include - -namespace llvm { - -// The MCJIT memory manager is a layer between the standard JITMemoryManager -// and the RuntimeDyld interface that maps objects, by name, onto their -// matching LLVM IR counterparts in the module(s) being compiled. -class MCJITMemoryManager : public RTDyldMemoryManager { - virtual void anchor(); - OwningPtr JMM; - -public: - MCJITMemoryManager(JITMemoryManager *jmm) : - JMM(jmm?jmm:JITMemoryManager::CreateDefaultMemManager()) {} - - uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) { - return JMM->allocateDataSection(Size, Alignment, SectionID); - } - - uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) { - return JMM->allocateCodeSection(Size, Alignment, SectionID); - } - - virtual void *getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure = true) { - return JMM->getPointerToNamedFunction(Name, AbortOnFailure); - } - -}; - -} // End llvm namespace - -#endif diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp index 8b50101..50cd072 100644 --- a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp @@ -78,12 +78,12 @@ public: /// Creates an entry in the JIT registry for the buffer @p Object, /// which must contain an object file in executable memory with any /// debug information for the debugger. - void registerObject(const MemoryBuffer &Object); + void registerObject(const ObjectBuffer &Object); /// Removes the internal registration of @p Object, and /// frees associated resources. /// Returns true if @p Object was found in ObjectBufferMap. - bool deregisterObject(const MemoryBuffer &Object); + bool deregisterObject(const ObjectBuffer &Object); private: /// Deregister the debug info for the given object file from the debugger @@ -124,7 +124,7 @@ GDBJITRegistrar::~GDBJITRegistrar() { ObjectBufferMap.clear(); } -void GDBJITRegistrar::registerObject(const MemoryBuffer &Object) { +void GDBJITRegistrar::registerObject(const ObjectBuffer &Object) { const char *Buffer = Object.getBufferStart(); size_t Size = Object.getBufferSize(); @@ -147,7 +147,7 @@ void GDBJITRegistrar::registerObject(const MemoryBuffer &Object) { } } -bool GDBJITRegistrar::deregisterObject(const MemoryBuffer& Object) { +bool GDBJITRegistrar::deregisterObject(const ObjectBuffer& Object) { const char *Buffer = Object.getBufferStart(); RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(Buffer); diff --git a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h index f964bc6..69e9dbe 100644 --- a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h +++ b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h @@ -10,7 +10,7 @@ #ifndef LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H #define LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/ExecutionEngine/ObjectBuffer.h" namespace llvm { @@ -27,12 +27,12 @@ public: /// Creates an entry in the JIT registry for the buffer @p Object, /// which must contain an object file in executable memory with any /// debug information for the debugger. - virtual void registerObject(const MemoryBuffer &Object) = 0; + virtual void registerObject(const ObjectBuffer &Object) = 0; /// Removes the internal registration of @p Object, and /// frees associated resources. /// Returns true if @p Object was previously registered. - virtual bool deregisterObject(const MemoryBuffer &Object) = 0; + virtual bool deregisterObject(const ObjectBuffer &Object) = 0; /// Returns a reference to a GDB JIT registrar singleton static JITRegistrar& getGDBRegistrar(); diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h deleted file mode 100644 index c3e3572..0000000 --- a/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h +++ /dev/null @@ -1,59 +0,0 @@ -//===---- ObjectImage.h - Format independent executuable object image -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares a file format independent ObjectImage class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H -#define LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H - -#include "llvm/Object/ObjectFile.h" - -namespace llvm { - -class ObjectImage { - ObjectImage(); // = delete - ObjectImage(const ObjectImage &other); // = delete -protected: - object::ObjectFile *ObjFile; - -public: - ObjectImage(object::ObjectFile *Obj) { ObjFile = Obj; } - virtual ~ObjectImage() {} - - virtual object::symbol_iterator begin_symbols() const - { return ObjFile->begin_symbols(); } - virtual object::symbol_iterator end_symbols() const - { return ObjFile->end_symbols(); } - - virtual object::section_iterator begin_sections() const - { return ObjFile->begin_sections(); } - virtual object::section_iterator end_sections() const - { return ObjFile->end_sections(); } - - virtual /* Triple::ArchType */ unsigned getArch() const - { return ObjFile->getArch(); } - - // Subclasses can override these methods to update the image with loaded - // addresses for sections and common symbols - virtual void updateSectionAddress(const object::SectionRef &Sec, - uint64_t Addr) {} - virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr) - {} - - // Subclasses can override these methods to provide JIT debugging support - virtual void registerWithDebugger() {} - virtual void deregisterWithDebugger() {} -}; - -} // end namespace llvm - -#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H - diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h new file mode 100644 index 0000000..17f3a21 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h @@ -0,0 +1,76 @@ +//===-- ObjectImageCommon.h - Format independent executuable object image -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares a file format independent ObjectImage class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H +#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H + +#include "llvm/Object/ObjectFile.h" +#include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/ExecutionEngine/ObjectBuffer.h" + +namespace llvm { + +class ObjectImageCommon : public ObjectImage { + ObjectImageCommon(); // = delete + ObjectImageCommon(const ObjectImageCommon &other); // = delete + +protected: + object::ObjectFile *ObjFile; + + // This form of the constructor allows subclasses to use + // format-specific subclasses of ObjectFile directly + ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj) + : ObjectImage(Input), // saves Input as Buffer and takes ownership + ObjFile(Obj) + { + } + +public: + ObjectImageCommon(ObjectBuffer* Input) + : ObjectImage(Input) // saves Input as Buffer and takes ownership + { + ObjFile = object::ObjectFile::createObjectFile(Buffer->getMemBuffer()); + } + virtual ~ObjectImageCommon() { delete ObjFile; } + + virtual object::symbol_iterator begin_symbols() const + { return ObjFile->begin_symbols(); } + virtual object::symbol_iterator end_symbols() const + { return ObjFile->end_symbols(); } + + virtual object::section_iterator begin_sections() const + { return ObjFile->begin_sections(); } + virtual object::section_iterator end_sections() const + { return ObjFile->end_sections(); } + + virtual /* Triple::ArchType */ unsigned getArch() const + { return ObjFile->getArch(); } + + virtual StringRef getData() const { return ObjFile->getData(); } + + // Subclasses can override these methods to update the image with loaded + // addresses for sections and common symbols + virtual void updateSectionAddress(const object::SectionRef &Sec, + uint64_t Addr) {} + virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr) + {} + + // Subclasses can override these methods to provide JIT debugging support + virtual void registerWithDebugger() {} + virtual void deregisterWithDebugger() {} +}; + +} // end namespace llvm + +#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H + diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index b464040..f6dccb1 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dyld" +#include "ObjectImageCommon.h" #include "RuntimeDyldImpl.h" #include "RuntimeDyldELF.h" #include "RuntimeDyldMachO.h" #include "llvm/Support/Path.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; using namespace llvm::object; @@ -26,16 +28,6 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {} namespace llvm { -namespace { - // Helper for extensive error checking in debug builds. - error_code Check(error_code Err) { - if (Err) { - report_fatal_error(Err.message()); - } - return Err; - } -} // end anonymous namespace - // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { // First, resolve relocations associated with external symbols. @@ -44,11 +36,15 @@ void RuntimeDyldImpl::resolveRelocations() { // Just iterate over the sections we have and resolve all the relocations // in them. Gross overkill, but it gets the job done. for (int i = 0, e = Sections.size(); i != e; ++i) { - reassignSectionAddress(i, Sections[i].LoadAddress); + uint64_t Addr = Sections[i].LoadAddress; + DEBUG(dbgs() << "Resolving relocations Section #" << i + << "\t" << format("%p", (uint8_t *)Addr) + << "\n"); + resolveRelocationList(Relocations[i], Addr); } } -void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress, +void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress) { for (unsigned i = 0, e = Sections.size(); i != e; ++i) { if (Sections[i].Address == LocalAddress) { @@ -61,14 +57,11 @@ void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress, // Subclasses can implement this method to create specialized image instances. // The caller owns the pointer that is returned. -ObjectImage *RuntimeDyldImpl::createObjectImage(const MemoryBuffer *InputBuffer) { - ObjectFile *ObjFile = ObjectFile::createObjectFile(const_cast - (InputBuffer)); - ObjectImage *Obj = new ObjectImage(ObjFile); - return Obj; +ObjectImage *RuntimeDyldImpl::createObjectImage(ObjectBuffer *InputBuffer) { + return new ObjectImageCommon(InputBuffer); } -bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { +ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { OwningPtr obj(createObjectImage(InputBuffer)); if (!obj) report_fatal_error("Unable to create object image from memory buffer!"); @@ -80,9 +73,9 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { // Used sections from the object file ObjSectionToIDMap LocalSections; - // Common symbols requiring allocation, and the total size required to - // allocate all common symbols. + // Common symbols requiring allocation, with their sizes and alignments CommonSymbolMap CommonSymbols; + // Maximum required total memory to allocate all common symbols uint64_t CommonSize = 0; error_code err; @@ -102,13 +95,15 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { bool isCommon = flags & SymbolRef::SF_Common; if (isCommon) { // Add the common symbols to a list. We'll allocate them all below. + uint64_t Align = getCommonSymbolAlignment(*i); uint64_t Size = 0; Check(i->getSize(Size)); - CommonSize += Size; - CommonSymbols[*i] = Size; + CommonSize += Size + Align; + CommonSymbols[*i] = CommonSymbolInfo(Size, Align); } else { if (SymType == object::SymbolRef::ST_Function || - SymType == object::SymbolRef::ST_Data) { + SymType == object::SymbolRef::ST_Data || + SymType == object::SymbolRef::ST_Unknown) { uint64_t FileOffset; StringRef SectionData; section_iterator si = obj->end_sections(); @@ -177,9 +172,7 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { } } - handleObjectLoaded(obj.take()); - - return false; + return obj.take(); } void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, @@ -193,7 +186,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, if (!Addr) report_fatal_error("Unable to allocate memory for common symbols!"); uint64_t Offset = 0; - Sections.push_back(SectionEntry(Addr, TotalSize, TotalSize, 0)); + Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0)); memset(Addr, 0, TotalSize); DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID @@ -204,11 +197,20 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, // Assign the address of each symbol for (CommonSymbolMap::const_iterator it = CommonSymbols.begin(), itEnd = CommonSymbols.end(); it != itEnd; it++) { + uint64_t Size = it->second.first; + uint64_t Align = it->second.second; StringRef Name; it->first.getName(Name); + if (Align) { + // This symbol has an alignment requirement. + uint64_t AlignOffset = OffsetToAlignment((uint64_t)Addr, Align); + Addr += AlignOffset; + Offset += AlignOffset; + DEBUG(dbgs() << "Allocating common symbol " << Name << " address " << + format("%p\n", Addr)); + } Obj.updateSymbolAddress(it->first, (uint64_t)Addr); SymbolTable[Name.data()] = SymbolLoc(SectionID, Offset); - uint64_t Size = it->second; Offset += Size; Addr += Size; } @@ -236,10 +238,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, bool IsVirtual; bool IsZeroInit; uint64_t DataSize; + StringRef Name; Check(Section.isRequiredForExecution(IsRequired)); Check(Section.isVirtual(IsVirtual)); Check(Section.isZeroInit(IsZeroInit)); Check(Section.getSize(DataSize)); + Check(Section.getName(Name)); unsigned Allocate; unsigned SectionID = Sections.size(); @@ -267,6 +271,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, memcpy(Addr, pData, DataSize); DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " Name: " << Name << " obj addr: " << format("%p", pData) << " new addr: " << format("%p", Addr) << " DataSize: " << DataSize @@ -282,6 +287,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, Allocate = 0; Addr = 0; DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " Name: " << Name << " obj addr: " << format("%p", data.data()) << " new addr: 0" << " DataSize: " << DataSize @@ -290,7 +296,8 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, << "\n"); } - Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData)); + Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize, + (uintptr_t)pData)); return SectionID; } @@ -333,15 +340,49 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, } uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { - // TODO: There is only ARM far stub now. We should add the Thumb stub, - // and stubs for branches Thumb - ARM and ARM - Thumb. if (Arch == Triple::arm) { + // TODO: There is only ARM far stub now. We should add the Thumb stub, + // and stubs for branches Thumb - ARM and ARM - Thumb. uint32_t *StubAddr = (uint32_t*)Addr; *StubAddr = 0xe51ff004; // ldr pc,