1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
use crate::core::compiler::{CompileKind, CompileMode};
use crate::core::{profiles::Profile, Package, Target};
use crate::util::hex::short_hash;
use std::cell::RefCell;
use std::collections::HashSet;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::Deref;

/// All information needed to define a unit.
///
/// A unit is an object that has enough information so that cargo knows how to build it.
/// For example, if your package has dependencies, then every dependency will be built as a library
/// unit. If your package is a library, then it will be built as a library unit as well, or if it
/// is a binary with `main.rs`, then a binary will be output. There are also separate unit types
/// for `test`ing and `check`ing, amongst others.
///
/// The unit also holds information about all possible metadata about the package in `pkg`.
///
/// A unit needs to know extra information in addition to the type and root source file. For
/// example, it needs to know the target architecture (OS, chip arch etc.) and it needs to know
/// whether you want a debug or release build. There is enough information in this struct to figure
/// all that out.
#[derive(Clone, Copy, PartialOrd, Ord)]
pub struct Unit<'a> {
    inner: &'a UnitInner<'a>,
}

/// Internal fields of `Unit` which `Unit` will dereference to.
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct UnitInner<'a> {
    /// Information about available targets, which files to include/exclude, etc. Basically stuff in
    /// `Cargo.toml`.
    pub pkg: &'a Package,
    /// Information about the specific target to build, out of the possible targets in `pkg`. Not
    /// to be confused with *target-triple* (or *target architecture* ...), the target arch for a
    /// build.
    pub target: &'a Target,
    /// The profile contains information about *how* the build should be run, including debug
    /// level, etc.
    pub profile: Profile,
    /// Whether this compilation unit is for the host or target architecture.
    ///
    /// For example, when
    /// cross compiling and using a custom build script, the build script needs to be compiled for
    /// the host architecture so the host rustc can use it (when compiling to the target
    /// architecture).
    pub kind: CompileKind,
    /// The "mode" this unit is being compiled for. See [`CompileMode`] for more details.
    pub mode: CompileMode,
    /// The `cfg` features to enable for this unit.
    /// This must be sorted.
    pub features: Vec<&'a str>,
    /// Whether this is a standard library unit.
    pub is_std: bool,
}

impl UnitInner<'_> {
    /// Returns whether compilation of this unit requires all upstream artifacts
    /// to be available.
    ///
    /// This effectively means that this unit is a synchronization point (if the
    /// return value is `true`) that all previously pipelined units need to
    /// finish in their entirety before this one is started.
    pub fn requires_upstream_objects(&self) -> bool {
        self.mode.is_any_test() || self.target.kind().requires_upstream_objects()
    }
}

impl<'a> Unit<'a> {
    pub fn buildkey(&self) -> String {
        format!("{}-{}", self.pkg.name(), short_hash(self))
    }
}

// Just hash the pointer for fast hashing
impl<'a> Hash for Unit<'a> {
    fn hash<H: Hasher>(&self, hasher: &mut H) {
        (self.inner as *const UnitInner<'a>).hash(hasher)
    }
}

// Just equate the pointer since these are interned
impl<'a> PartialEq for Unit<'a> {
    fn eq(&self, other: &Unit<'a>) -> bool {
        self.inner as *const UnitInner<'a> == other.inner as *const UnitInner<'a>
    }
}

impl<'a> Eq for Unit<'a> {}

impl<'a> Deref for Unit<'a> {
    type Target = UnitInner<'a>;

    fn deref(&self) -> &UnitInner<'a> {
        self.inner
    }
}

impl<'a> fmt::Debug for Unit<'a> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("Unit")
            .field("pkg", &self.pkg)
            .field("target", &self.target)
            .field("profile", &self.profile)
            .field("kind", &self.kind)
            .field("mode", &self.mode)
            .field("features", &self.features)
            .finish()
    }
}

/// A small structure used to "intern" `Unit` values.
///
/// A `Unit` is just a thin pointer to an internal `UnitInner`. This is done to
/// ensure that `Unit` itself is quite small as well as enabling a very
/// efficient hash/equality implementation for `Unit`. All units are
/// manufactured through an interner which guarantees that each equivalent value
/// is only produced once.
pub struct UnitInterner<'a> {
    state: RefCell<InternerState<'a>>,
}

struct InternerState<'a> {
    cache: HashSet<Box<UnitInner<'a>>>,
}

impl<'a> UnitInterner<'a> {
    /// Creates a new blank interner
    pub fn new() -> UnitInterner<'a> {
        UnitInterner {
            state: RefCell::new(InternerState {
                cache: HashSet::new(),
            }),
        }
    }

    /// Creates a new `unit` from its components. The returned `Unit`'s fields
    /// will all be equivalent to the provided arguments, although they may not
    /// be the exact same instance.
    pub fn intern(
        &'a self,
        pkg: &'a Package,
        target: &'a Target,
        profile: Profile,
        kind: CompileKind,
        mode: CompileMode,
        features: Vec<&'a str>,
        is_std: bool,
    ) -> Unit<'a> {
        let inner = self.intern_inner(&UnitInner {
            pkg,
            target,
            profile,
            kind,
            mode,
            features,
            is_std,
        });
        Unit { inner }
    }

    // Ok so interning here is a little unsafe, hence the usage of `unsafe`
    // internally. The primary issue here is that we've got an internal cache of
    // `UnitInner` instances added so far, but we may need to mutate it to add
    // it, and the mutation for an interner happens behind a shared borrow.
    //
    // Our goal though is to escape the lifetime `borrow_mut` to the same
    // lifetime as the borrowed passed into this function. That's where `unsafe`
    // comes into play. What we're subverting here is resizing internally in the
    // `HashSet` as well as overwriting previous keys in the `HashSet`.
    //
    // As a result we store `Box<UnitInner>` internally to have an extra layer
    // of indirection. That way `*const UnitInner` is a stable address that
    // doesn't change with `HashSet` resizing. Furthermore we're careful to
    // never overwrite an entry once inserted.
    //
    // Ideally we'd use an off-the-shelf interner from crates.io which avoids a
    // small amount of unsafety here, but at the time this was written one
    // wasn't obviously available.
    fn intern_inner(&'a self, item: &UnitInner<'a>) -> &'a UnitInner<'a> {
        let mut me = self.state.borrow_mut();
        if let Some(item) = me.cache.get(item) {
            // note that `item` has type `&Box<UnitInner<'a>`. Use `&**` to
            // convert that to `&UnitInner<'a>`, then do some trickery to extend
            // the lifetime to the `'a` on the function here.
            return unsafe { &*(&**item as *const UnitInner<'a>) };
        }
        me.cache.insert(Box::new(item.clone()));
        let item = me.cache.get(item).unwrap();
        unsafe { &*(&**item as *const UnitInner<'a>) }
    }
}