ref: 0e91e3f7753eb9a0f3f7fe758c6d94690ef86000
parent: 60eafd49a8e2a67d82ad1e9f5d0618003c6c4cec
author: Matthew Flatt <mflatt@racket-lang.org>
date: Wed Nov 30 09:58:25 CST 2022
detect cycles across concurrent explorations Saw a talk by @snowleopard about cycle detection in Dune and learned that Zuo must be missing something there. Zuo would just deadlock when a cycle is not confined to a build thread. Instead of an eager incremental cycle detection, the change here performs a plain old graph cycle check at the point that it would otherwise deadlock (which delays discovery of a cycle until everything else is finished). There's a small added overhead for tracking paused-thread dependencies and anticipating deadlock, though.
--- a/lib/zuo/build.zuo
+++ b/lib/zuo/build.zuo
@@ -213,6 +213,8 @@
db ; key -> (cons sha256 dep-sha256s) | #t [for db file itself]
time-cache ; key -> (cons timestamp sha256)
saw-targets ; key -> target [to detect multiple for same output]
+ thread-count ; channel to hold thread-scheduler state
+ thread-seens ; sym -> hash [to detect cycles when count goes to 0]
resourcer ; build stat's resource manager
log? ; logging enabled?
uni?)) ; just one job?
@@ -240,13 +242,14 @@
;; assume jobserver allows more than 1:
2)
(default-jobs)))
- (define ch (channel))
- (define state (build-state ch
+ (define state (build-state (channel)
(hash)
(hash)
(hash)
(hash)
(hash)
+ 1
+ (hash)
(or resourcer
jobserver-resourcer
(make-resourcer num-jobs))
@@ -300,21 +303,30 @@
(let ([state (ensure-consistent state t)])
(define current (target-state state t))
(cond
- [(not current) (force-build t (build-input-or-unbuilt t state seen top?))]
- [(channel? current) (force-build t state)]
+ [(not current) (force-build t (build-input-or-unbuilt t state seen top?) seen)]
+ [(channel? current) (force-build t state seen)]
[else state]))]))
;; Blocks until an in-progress target completes
-(define (force-build t state)
+(define (force-build t state seen)
(define current (target-state state t))
(cond
[(channel? current)
- ;; Waiting on the channel might block, so relinquish state
- (put-state (build-state-ch state) state "force")
- (channel-get current)
- ;; Put value back to potentially satisfy some other waiting thread:
- (channel-put current 'still-done)
- (get-state (build-state-ch state) "force")]
+ (define (ready-again)
+ ;; Put value back to potentially satisfy some other waiting thread:
+ (channel-put current 'still-done))
+ (cond
+ [(channel-try-get current)
+ (ready-again)
+ state]
+ [else
+ ;; Waiting on the channel might block, so relinquish state
+ (define th-key (string->uninterned-symbol "thread"))
+ (define new-state (check/add-blocking-dependency state seen t th-key))
+ (put-state (build-state-ch new-state) new-state "force")
+ (channel-get current)
+ (ready-again)
+ (remove-blocking-dependency (get-state (build-state-ch new-state) "force") th-key)])]
[else state]))
;; Shortcut for plain inputs, otherwise starts a build
@@ -394,12 +406,13 @@
(define rule-state (get-state (token-ch tok) "rule"))
;; trigger builds of dependencies, but don't want for them to complete
- (for-each (make-fetch-dep rule-state new-seen dep-top?) deps)
+ (define fetch-state
+ (foldl (make-fetch-dep rule-state new-seen dep-top?) rule-state deps))
;; now that they're all potentially started, wait for completions
(define new-state
(foldl (lambda (dep state) (do-build dep state new-seen dep-top?))
- rule-state
+ fetch-state
deps))
;; extract results, assemble in a hash table: <rel-path> -> <sha256>
@@ -439,10 +452,11 @@
'()
(hash-keys prev-dep-sha256s))
'()))
- (for-each (make-fetch-dep new-state new-seen dep-top?) more-deps)
+ (define fetch-again-state
+ (foldl (make-fetch-dep new-state new-seen dep-top?) new-state more-deps))
(define newer-state
(foldl (lambda (dep state) (do-build dep state new-seen dep-top?))
- new-state
+ fetch-again-state
more-deps))
(define all-dep-sha256s
(foldl (lambda (dep dep-sha256s)
@@ -503,29 +517,31 @@
(build-one (lambda (proc) (proc (get-state state-ch "build"))))]
[else
;; run build in its own thread
- (thread (lambda ()
- (acquire-resource newer-state path) ; limits process parallelism
- (build-one
- (lambda (proc)
- (release-resource newer-state path)
- (let* ([state (get-state state-ch "tbuild")]
- [state (proc state)])
- (put-state state-ch state "tbuild"))))))
- newer-state]))]))
+ (thread* newer-state
+ (lambda ()
+ (acquire-resource newer-state path) ; limits process parallelism
+ (build-one
+ (lambda (proc)
+ (release-resource newer-state path)
+ (let* ([state (get-state state-ch "tbuild")]
+ [state (proc state)])
+ (put-state state-ch (thread-done state) "tbuild"))))))]))]))
(define (make-fetch-dep state seen top?)
(if (build-state-uni? state)
- void
+ (lambda (dep state)
+ state)
(let ([state-ch (build-state-ch state)])
- (lambda (dep)
+ (lambda (dep state)
;; No one waits for the this thread's work, and it doesn't
;; actually use the build result. It just creates a demand for
;; the build result, so the demand exists concurrent to waiting
;; on dependencies.
- (thread (lambda ()
- (let* ([state (get-state state-ch "fetch")]
- [state (do-build dep state seen top?)])
- (put-state state-ch state "fetch"))))))))
+ (thread* state
+ (lambda ()
+ (let* ([state (get-state state-ch "fetch")]
+ [state (do-build dep state seen top?)])
+ (put-state state-ch (thread-done state) "fetch"))))))))
;; Alternative entry point suitable for use from a script's `main`
(define (build/command-line targets [opts (hash)])
@@ -787,6 +803,66 @@
key)))
;; see also "private/build-db.zuo"
+
+;; ------------------------------------------------------------
+;; Cross-thread dependency-cycle checking
+
+;; Within a single thread, a `seen` table detects cycles, but that's
+;; not good enough for cycles across threads. To detect those cycles,
+;; record the `seen` chain plus `t` for a thread while it is blocking
+;; on `t`. If we run out of threads (i.e., deadlock), then we can
+;; construct a graph based on `seen` -> `t` dependendies and look for
+;; a cycle in the graph.
+
+(define (thread* state thunk)
+ (thread thunk)
+ (build-state-set-thread-count state (+ (build-state-thread-count state) 1)))
+
+(define (thread-done state)
+ (build-state-set-thread-count state (- (build-state-thread-count state) 1)))
+
+(define (check/add-blocking-dependency state seen t th-key)
+ (define new-seens (hash-set (build-state-thread-seens state) th-key (cons seen t)))
+ (define new-state (build-state-set-thread-seens state new-seens))
+ (define count (build-state-thread-count state))
+ (when (= count 1)
+ ;; potential deadlock, so look for cycle
+ (define edges
+ (foldl (lambda (k edges)
+ (define seen+t (hash-ref new-seens k))
+ (define t (cdr seen+t))
+ (foldl (lambda (k edges)
+ (hash-set edges k (cons t (hash-ref edges k '()))))
+ edges
+ (hash-keys (car seen+t))))
+ (hash)
+ (hash-keys new-seens)))
+ (foldl (lambda (k done)
+ (foldl (lambda (t done)
+ (let loop ([t t] [seen (hash)])
+ (define t-k (target-key t))
+ (cond
+ [(hash-ref done t-k #f) done]
+ [else
+ (when (hash-ref seen t-k #f)
+ (error "build: dependency cycle" (target-name t)))
+ (define new-seen (hash-set seen (target-key t) #t))
+ (define new-done
+ (foldl (lambda (t done)
+ (loop t new-seen))
+ done
+ (hash-ref edges (target-key t) '())))
+ ;; didn't detect cycle, so this token is ok
+ (hash-set new-done t-k #t)])))
+ done
+ (hash-ref edges k)))
+ (hash)
+ (hash-keys edges)))
+ (build-state-set-thread-count new-state (- count 1)))
+
+(define (remove-blocking-dependency state th-key)
+ (build-state-set-thread-seens (build-state-set-thread-count state (+ (build-state-thread-count state) 1))
+ (hash-remove (build-state-thread-seens state) th-key)))
;; ------------------------------------------------------------
;; Generic helpers
--- a/lib/zuo/thread.zuo
+++ b/lib/zuo/thread.zuo
@@ -8,6 +8,7 @@
channel?
channel-put
channel-get
+ channel-try-get
thread-process-wait)
(struct thread (id))
@@ -155,7 +156,7 @@
[else
(loop (ch-set-w-tl (ch-set-w-hd ch (reverse w-tl)) '()))]))])))))
-(define (raw-channel-get st chl k yield-k)
+(define (raw-channel-get st chl k yield-k [just-try? #f])
(let* ([chs (state-channels st)]
[ch (hash-ref chs (channel-id chl) #f)])
(unless ch (error "channel-get: does not belong to the running thread group" ch))
@@ -170,13 +171,17 @@
(let* ([tl (ch-tl ch)])
(cond
[(null? tl)
- (call/cc
- (lambda (k)
- (yield-k (update-ch st (ch-set-w-tl ch (cons k (ch-w-tl ch)))))))]
+ (cond
+ [just-try?
+ (yield-k st)]
+ [else
+ (call/cc
+ (lambda (k)
+ (yield-k (update-ch st (ch-set-w-tl ch (cons k (ch-w-tl ch)))))))])]
[else
(loop (ch-set-tl (ch-set-hd ch (reverse tl)) '()))]))])))))
-(define (channel-get chl)
+(define (do-channel-get who chl unavailable just-try?)
(unless (channel? chl) (arg-error 'channel-get "channel" chl))
(check-in-thread 'channel-get)
(raw-channel-get (current-state) chl
@@ -185,7 +190,14 @@
v)
(lambda (st)
(current-state st)
- (yield))))
+ (unavailable))
+ just-try?))
+
+(define (channel-get chl)
+ (do-channel-get 'channel-get chl yield #f))
+
+(define (channel-try-get chl)
+ (do-channel-get 'channel-try-get chl (lambda () #f) #t))
(define (thread-process-wait p . ps)
(for-each (lambda (p)
--- /dev/null
+++ b/tests/build-cycle.zuo
@@ -1,0 +1,43 @@
+#lang zuo
+(require "harness.zuo")
+
+;; This script is run by the "build.zuo" test
+
+(define (pause)
+ (thread-process-wait
+ (hash-ref
+ (process (hash-ref (runtime-env) 'exe)
+ "-c"
+ "#lang zuo/kernel (hash)")
+ 'process)))
+
+(define (touch fn)
+ (error "shouldn't get to touch")
+ (fd-close (fd-open-output fn :truncate)))
+
+(define (tmp fn)
+ (build-path tmp-dir fn))
+
+(define x (target (tmp "cycle-x")
+ (lambda (path token)
+ (pause)
+ (rule (list z)
+ (lambda ()
+ (touch path))))))
+
+(define y (target (tmp "cycle-y")
+ (lambda (path token)
+ (pause)
+ (rule (list x)
+ (lambda ()
+ (touch path))))))
+
+(define z (target (tmp "cycle-z")
+ (lambda (path token)
+ (pause)
+ (rule (list y)
+ (lambda ()
+ (touch path))))))
+
+;; should fail with cycle error:
+(build (list x y z) #f (hash 'jobs 3))
--- /dev/null
+++ b/tests/build.zuo
@@ -1,0 +1,19 @@
+#lang zuo
+
+(require "harness.zuo")
+
+(alert "build")
+
+(let ()
+ (define p (process (hash-ref (runtime-env) 'exe #f)
+ (at-source "build-cycle.zuo")
+ (hash 'stdin 'pipe 'stdout 'pipe 'stderr 'pipe)))
+ (fd-close (hash-ref p 'stdin))
+ (define out (fd-read (hash-ref p 'stdout) eof))
+ (define err (fd-read (hash-ref p 'stderr) eof))
+ (fd-close (hash-ref p 'stdout))
+ (fd-close (hash-ref p 'stderr))
+ (process-wait (hash-ref p 'process))
+
+ (check (glob-match? "*dependency cycle*" err) #t)
+ (check "" out))
--- a/tests/main.zuo
+++ b/tests/main.zuo
@@ -24,6 +24,7 @@
(require "config.zuo")
(require "c.zuo")
(require "cycle.zuo")
+(require "build.zuo")
(require "form.zuo")
(require "form-hygienic.zuo")
(require "macro.zuo")
--- a/tests/thread.zuo
+++ b/tests/thread.zuo
@@ -156,3 +156,9 @@
(lambda ()
((call/prompt (lambda () (call/cc (lambda (k) k)))) 0))))
"main thread is stuck")
+
+(check (channel-try-get (channel)) #f)
+(check (let ([ch (channel)])
+ (channel-put ch "x")
+ (channel-try-get ch))
+ "x")
--- a/zuo-doc/fake-zuo.rkt
+++ b/zuo-doc/fake-zuo.rkt
@@ -234,7 +234,7 @@
build-shell
call-in-main-thread
- thread? thread channel? channel channel-put channel-get
+ thread? thread channel? channel channel-put channel-get channel-try-get
thread-process-wait
config-file->hash
--- a/zuo-doc/zuo-lib.scrbl
+++ b/zuo-doc/zuo-lib.scrbl
@@ -139,10 +139,11 @@
@defproc[(channel? [v any/c]) boolean?]
@defproc[(channel-put [ch channel?] [v any/c]) channel?]
@defproc[(channel-get [ch channel?]) any/c]
+@defproc[(channel-try-get [ch channel?]) any/c]
)]{
Analogous to @realracket*[thread thread? make-channel channel? channel-put
-channel-get] from @racketmodname[racket], but channels are
+channel-get channel-try-get] from @racketmodname[racket], but channels are
asynchronous (with an unbounded queue) instead of synchronous.
Except for @racket[thread?] and @racket[channel?], these procedures
@@ -152,7 +153,9 @@
Beware that attempting to use these operations outside of a threading
context will @emph{not} necessarily trigger an error, and may instead
deliver an opaque threading request to the enclosing continuation
-prompt.}
+prompt.
+
+@history[#:changed "1.4" @elem{Added @racket[channel-try-get].}]}
@defproc[(thread-process-wait [process handle?] ...) handle?]{
--- a/zuo.c
+++ b/zuo.c
@@ -3,7 +3,7 @@
declarations. */
#define ZUO_VERSION 1
-#define ZUO_MINOR_VERSION 3
+#define ZUO_MINOR_VERSION 4
#if defined(_MSC_VER) || defined(__MINGW32__)
# define ZUO_WINDOWS