home: hub: zuo

Download patch

ref: 0e91e3f7753eb9a0f3f7fe758c6d94690ef86000
parent: 60eafd49a8e2a67d82ad1e9f5d0618003c6c4cec
author: Matthew Flatt <mflatt@racket-lang.org>
date: Wed Nov 30 09:58:25 CST 2022

detect cycles across concurrent explorations

Saw a talk by @snowleopard about cycle detection in Dune and learned
that Zuo must be missing something there. Zuo would just deadlock when
a cycle is not confined to a build thread. Instead of an eager
incremental cycle detection, the change here performs a plain old
graph cycle check at the point that it would otherwise deadlock (which
delays discovery of a cycle until everything else is finished).
There's a small added overhead for tracking paused-thread dependencies
and anticipating deadlock, though.

--- a/lib/zuo/build.zuo
+++ b/lib/zuo/build.zuo
@@ -213,6 +213,8 @@
                      db            ; key -> (cons sha256 dep-sha256s) | #t [for db file itself]
                      time-cache    ; key -> (cons timestamp sha256)
                      saw-targets   ; key -> target [to detect multiple for same output]
+                     thread-count  ; channel to hold thread-scheduler state
+                     thread-seens  ; sym -> hash [to detect cycles when count goes to 0]
                      resourcer     ; build stat's resource manager
                      log?          ; logging enabled?
                      uni?))        ; just one job?
@@ -240,13 +242,14 @@
                                    ;; assume jobserver allows more than 1:
                                    2)
                               (default-jobs)))
-         (define ch (channel))
-         (define state (build-state ch
+         (define state (build-state (channel)
                                     (hash)
                                     (hash)
                                     (hash)
                                     (hash)
                                     (hash)
+                                    1
+                                    (hash)
                                     (or resourcer
                                         jobserver-resourcer
                                         (make-resourcer num-jobs))
@@ -300,21 +303,30 @@
      (let ([state (ensure-consistent state t)])
        (define current (target-state state t))
        (cond
-         [(not current) (force-build t (build-input-or-unbuilt t state seen top?))]
-         [(channel? current) (force-build t state)]
+         [(not current) (force-build t (build-input-or-unbuilt t state seen top?) seen)]
+         [(channel? current) (force-build t state seen)]
          [else state]))]))
 
 ;; Blocks until an in-progress target completes
-(define (force-build t state)
+(define (force-build t state seen)
   (define current (target-state state t))
   (cond
     [(channel? current)
-     ;; Waiting on the channel might block, so relinquish state
-     (put-state (build-state-ch state) state "force")
-     (channel-get current)
-     ;; Put value back to potentially satisfy some other waiting thread:
-     (channel-put current 'still-done)
-     (get-state (build-state-ch state) "force")]
+     (define (ready-again)
+       ;; Put value back to potentially satisfy some other waiting thread:
+       (channel-put current 'still-done))
+     (cond
+       [(channel-try-get current)
+        (ready-again)
+        state]
+       [else
+        ;; Waiting on the channel might block, so relinquish state
+        (define th-key (string->uninterned-symbol "thread"))
+        (define new-state (check/add-blocking-dependency state seen t th-key))
+        (put-state (build-state-ch new-state) new-state "force")
+        (channel-get current)
+        (ready-again)
+        (remove-blocking-dependency (get-state (build-state-ch new-state) "force") th-key)])]
     [else state]))
 
 ;; Shortcut for plain inputs, otherwise starts a build
@@ -394,12 +406,13 @@
   (define rule-state (get-state (token-ch tok) "rule"))
 
   ;; trigger builds of dependencies, but don't want for them to complete
-  (for-each (make-fetch-dep rule-state new-seen dep-top?) deps)
+  (define fetch-state
+    (foldl (make-fetch-dep rule-state new-seen dep-top?) rule-state deps))
 
   ;; now that they're all potentially started, wait for completions
   (define new-state
     (foldl (lambda (dep state) (do-build dep state new-seen dep-top?))
-           rule-state
+           fetch-state
            deps))
 
   ;; extract results, assemble in a hash table: <rel-path> -> <sha256>
@@ -439,10 +452,11 @@
                '()
                (hash-keys prev-dep-sha256s))
         '()))
-  (for-each (make-fetch-dep new-state new-seen dep-top?) more-deps)
+  (define fetch-again-state
+    (foldl (make-fetch-dep new-state new-seen dep-top?) new-state more-deps))
   (define newer-state
     (foldl (lambda (dep state) (do-build dep state new-seen dep-top?))
-           new-state
+           fetch-again-state
            more-deps))
   (define all-dep-sha256s
     (foldl (lambda (dep dep-sha256s)
@@ -503,29 +517,31 @@
           (build-one (lambda (proc) (proc (get-state state-ch "build"))))]
          [else
           ;; run build in its own thread
-          (thread (lambda ()
-                    (acquire-resource newer-state path) ; limits process parallelism
-                    (build-one
-                     (lambda (proc)
-                       (release-resource newer-state path)
-                       (let* ([state (get-state state-ch "tbuild")]
-                              [state (proc state)])
-                         (put-state state-ch state "tbuild"))))))
-          newer-state]))]))
+          (thread* newer-state
+                   (lambda ()
+                     (acquire-resource newer-state path) ; limits process parallelism
+                     (build-one
+                      (lambda (proc)
+                        (release-resource newer-state path)
+                        (let* ([state (get-state state-ch "tbuild")]
+                               [state (proc state)])
+                          (put-state state-ch (thread-done state) "tbuild"))))))]))]))
 
 (define (make-fetch-dep state seen top?)
   (if (build-state-uni? state)
-      void
+      (lambda (dep state)
+        state)
       (let ([state-ch (build-state-ch state)])
-        (lambda (dep)
+        (lambda (dep state)
           ;; No one waits for the this thread's work, and it doesn't
           ;; actually use the build result. It just creates a demand for
           ;; the build result, so the demand exists concurrent to waiting
           ;; on dependencies.
-          (thread (lambda ()
-                    (let* ([state (get-state state-ch "fetch")]
-                           [state (do-build dep state seen top?)])
-                      (put-state state-ch state "fetch"))))))))
+          (thread* state
+                   (lambda ()
+                     (let* ([state (get-state state-ch "fetch")]
+                            [state (do-build dep state seen top?)])
+                       (put-state state-ch (thread-done state) "fetch"))))))))
 
 ;; Alternative entry point suitable for use from a script's `main`
 (define (build/command-line targets [opts (hash)])
@@ -787,6 +803,66 @@
         key)))
 
 ;; see also "private/build-db.zuo"
+
+;; ------------------------------------------------------------
+;; Cross-thread dependency-cycle checking
+
+;; Within a single thread, a `seen` table detects cycles, but that's
+;; not good enough for cycles across threads. To detect those cycles,
+;; record the `seen` chain plus `t` for a thread while it is blocking
+;; on `t`. If we run out of threads (i.e., deadlock), then we can
+;; construct a graph based on `seen` -> `t` dependendies and look for
+;; a cycle in the graph.
+
+(define (thread* state thunk)
+  (thread thunk)
+  (build-state-set-thread-count state (+ (build-state-thread-count state) 1)))
+
+(define (thread-done state)
+  (build-state-set-thread-count state (- (build-state-thread-count state) 1)))
+
+(define (check/add-blocking-dependency state seen t th-key)
+  (define new-seens (hash-set (build-state-thread-seens state) th-key (cons seen t)))
+  (define new-state (build-state-set-thread-seens state new-seens))
+  (define count (build-state-thread-count state))
+  (when (= count 1)
+    ;; potential deadlock, so look for cycle
+    (define edges
+      (foldl (lambda (k edges)
+               (define seen+t (hash-ref new-seens k))
+               (define t (cdr seen+t))
+               (foldl (lambda (k edges)
+                        (hash-set edges k (cons t (hash-ref edges k '()))))
+                      edges
+                      (hash-keys (car seen+t))))
+             (hash)
+             (hash-keys new-seens)))
+    (foldl (lambda (k done)
+             (foldl (lambda (t done)
+                      (let loop ([t t] [seen (hash)])
+                        (define t-k (target-key t))
+                        (cond
+                          [(hash-ref done t-k #f) done]
+                          [else
+                           (when (hash-ref seen t-k #f)
+                             (error "build: dependency cycle" (target-name t)))
+                           (define new-seen (hash-set seen (target-key t) #t))
+                           (define new-done
+                             (foldl (lambda (t done)
+                                      (loop t new-seen))
+                                    done
+                                    (hash-ref edges (target-key t) '())))
+                           ;; didn't detect cycle, so this token is ok
+                           (hash-set new-done t-k #t)])))
+                    done
+                    (hash-ref edges k)))
+           (hash)
+           (hash-keys edges)))
+  (build-state-set-thread-count new-state (- count 1)))
+
+(define (remove-blocking-dependency state th-key)
+  (build-state-set-thread-seens (build-state-set-thread-count state (+ (build-state-thread-count state) 1))
+                                (hash-remove (build-state-thread-seens state) th-key)))
 
 ;; ------------------------------------------------------------
 ;; Generic helpers
--- a/lib/zuo/thread.zuo
+++ b/lib/zuo/thread.zuo
@@ -8,6 +8,7 @@
          channel?
          channel-put
          channel-get
+         channel-try-get
          thread-process-wait)
 
 (struct thread (id))
@@ -155,7 +156,7 @@
                [else
                 (loop (ch-set-w-tl (ch-set-w-hd ch (reverse w-tl)) '()))]))])))))
 
-(define (raw-channel-get st chl k yield-k)
+(define (raw-channel-get st chl k yield-k [just-try? #f])
   (let* ([chs (state-channels st)]
          [ch (hash-ref chs (channel-id chl) #f)])
     (unless ch (error "channel-get: does not belong to the running thread group" ch))
@@ -170,13 +171,17 @@
            (let* ([tl (ch-tl ch)])
              (cond
                [(null? tl)
-                (call/cc
-                 (lambda (k)
-                   (yield-k (update-ch st (ch-set-w-tl ch (cons k (ch-w-tl ch)))))))]
+                (cond
+                  [just-try?
+                   (yield-k st)]
+                  [else
+                   (call/cc
+                    (lambda (k)
+                      (yield-k (update-ch st (ch-set-w-tl ch (cons k (ch-w-tl ch)))))))])]
                [else
                 (loop (ch-set-tl (ch-set-hd ch (reverse tl)) '()))]))])))))
 
-(define (channel-get chl)
+(define (do-channel-get who chl unavailable just-try?)
   (unless (channel? chl) (arg-error 'channel-get "channel" chl))
   (check-in-thread 'channel-get)
   (raw-channel-get (current-state) chl
@@ -185,7 +190,14 @@
                      v)
                    (lambda (st)
                      (current-state st)
-                     (yield))))
+                     (unavailable))
+                   just-try?))
+
+(define (channel-get chl)
+  (do-channel-get 'channel-get chl yield #f))
+
+(define (channel-try-get chl)
+  (do-channel-get 'channel-try-get chl (lambda () #f) #t))
 
 (define (thread-process-wait p . ps)
   (for-each (lambda (p)
--- /dev/null
+++ b/tests/build-cycle.zuo
@@ -1,0 +1,43 @@
+#lang zuo
+(require "harness.zuo")
+
+;; This script is run by the "build.zuo" test
+
+(define (pause)
+  (thread-process-wait
+   (hash-ref
+    (process (hash-ref (runtime-env) 'exe)
+             "-c"
+             "#lang zuo/kernel (hash)")
+    'process)))
+
+(define (touch fn)
+  (error "shouldn't get to touch")
+  (fd-close (fd-open-output fn :truncate)))
+
+(define (tmp fn)
+  (build-path tmp-dir fn))
+
+(define x (target (tmp "cycle-x")
+                  (lambda (path token)
+                    (pause)
+                    (rule (list z)
+                          (lambda ()
+                            (touch path))))))
+
+(define y (target (tmp "cycle-y")
+                  (lambda (path token)
+                    (pause)
+                    (rule (list x)
+                          (lambda ()
+                            (touch path))))))
+
+(define z (target (tmp "cycle-z")
+                  (lambda (path token)
+                    (pause)
+                    (rule (list y)
+                          (lambda ()
+                            (touch path))))))
+
+;; should fail with cycle error:
+(build (list x y z) #f (hash 'jobs 3))
--- /dev/null
+++ b/tests/build.zuo
@@ -1,0 +1,19 @@
+#lang zuo
+
+(require "harness.zuo")
+
+(alert "build")
+
+(let ()
+  (define p (process (hash-ref (runtime-env) 'exe #f)
+                     (at-source "build-cycle.zuo")
+                     (hash 'stdin 'pipe 'stdout 'pipe 'stderr 'pipe)))
+  (fd-close (hash-ref p 'stdin))
+  (define out (fd-read (hash-ref p 'stdout) eof))
+  (define err (fd-read (hash-ref p 'stderr) eof))
+  (fd-close (hash-ref p 'stdout))
+  (fd-close (hash-ref p 'stderr))
+  (process-wait (hash-ref p 'process))
+
+  (check (glob-match? "*dependency cycle*" err) #t)
+  (check "" out))
--- a/tests/main.zuo
+++ b/tests/main.zuo
@@ -24,6 +24,7 @@
 (require "config.zuo")
 (require "c.zuo")
 (require "cycle.zuo")
+(require "build.zuo")
 (require "form.zuo")
 (require "form-hygienic.zuo")
 (require "macro.zuo")
--- a/tests/thread.zuo
+++ b/tests/thread.zuo
@@ -156,3 +156,9 @@
                (lambda ()
                  ((call/prompt (lambda () (call/cc (lambda (k) k)))) 0))))
             "main thread is stuck")
+
+(check (channel-try-get (channel)) #f)
+(check (let ([ch (channel)])
+         (channel-put ch "x")
+         (channel-try-get ch))
+       "x")
--- a/zuo-doc/fake-zuo.rkt
+++ b/zuo-doc/fake-zuo.rkt
@@ -234,7 +234,7 @@
     build-shell
 
     call-in-main-thread
-    thread? thread channel? channel channel-put channel-get
+    thread? thread channel? channel channel-put channel-get channel-try-get
     thread-process-wait
     config-file->hash
 
--- a/zuo-doc/zuo-lib.scrbl
+++ b/zuo-doc/zuo-lib.scrbl
@@ -139,10 +139,11 @@
 @defproc[(channel? [v any/c]) boolean?]
 @defproc[(channel-put [ch channel?] [v any/c]) channel?]
 @defproc[(channel-get [ch channel?]) any/c]
+@defproc[(channel-try-get [ch channel?]) any/c]
 )]{
 
 Analogous to @realracket*[thread thread? make-channel channel? channel-put
-channel-get] from @racketmodname[racket], but channels are
+channel-get channel-try-get] from @racketmodname[racket], but channels are
 asynchronous (with an unbounded queue) instead of synchronous.
 
 Except for @racket[thread?] and @racket[channel?], these procedures
@@ -152,7 +153,9 @@
 Beware that attempting to use these operations outside of a threading
 context will @emph{not} necessarily trigger an error, and may instead
 deliver an opaque threading request to the enclosing continuation
-prompt.}
+prompt.
+
+@history[#:changed "1.4" @elem{Added @racket[channel-try-get].}]}
 
 @defproc[(thread-process-wait [process handle?] ...) handle?]{
 
--- a/zuo.c
+++ b/zuo.c
@@ -3,7 +3,7 @@
    declarations. */
 
 #define ZUO_VERSION 1
-#define ZUO_MINOR_VERSION 3
+#define ZUO_MINOR_VERSION 4
 
 #if defined(_MSC_VER) || defined(__MINGW32__)
 # define ZUO_WINDOWS