So after a lot of optimizing i have something that i think is (as far as scripts go) pretty performant. It's still way slower than i had hoped it would be (even on a 50 core VM) but i guess there isn't to much i can do about make taking it's time (beyond maybe putting everything on a tmpfs, which i haven't tried yet). Anyways now that it's somewhat fast i run into problems, which i think are related to process limits ("couldn't fork child process: Resource temporarily unavailable").
My ulimit for processes is already at the maximum of 1044 and it doesn't seem like i can raise proc.curproc.rlimit.maxproc.hard. Am i correct in that to raise this i need to compile a new kernel or is there maybe some other way to avoid running into those limits? I'm hitting those even at just 50 "threads" and i really wouldn't want to go lower.
As far as that's interesting to anyone here is the script i'm currently using (it's TCL or rather Jim and needs a slightly patched - sqlite support - jimtcl package to run - all it needs is an addition of CONFIGURE_ARGS+= --with-ext=sqlite3
in it's Makefile):
#!/usr/pkg/bin/jimsh
set g_path_base /home/user/mnt
set g_path_make $g_path_base/pkg/bin/bmake
set g_path_log $g_path_base/log/db_build.log
set g_path_db /tmp/package.db
set g_path_tmpdb /tmp/tmp.db
set g_path_sock $g_path_base/sock
set g_cfg [dict create \
threads 8 \
debug 0 \
mode multi \
update 0 \
append 0 \
]
set g_log {}
set g_log_buf {}
set g_mode multi
set g_db {}
set g_tmpdb {}
set g_dbg_log {}
proc _dbg_cb { type fn line res cmd arglist } {
global g_dbg_log
if { $fn == {binary.tcl} || $cmd == {dbg_logend} || $cmd == {xtrace} } {
return
}
if { [set l [llength $g_dbg_log]] > 20 } {
set g_dbg_log [lrange $g_dbg_log 1 end]
}
if { $l > 0 } {
set e [lindex $g_dbg_log end]
lset e 3 $res
lset g_dbg_log end $e
}
lappend g_dbg_log [list $type $fn $line {} $cmd $arglist]
}
proc dbg_log {} {
global g_dbg_log
set r {}
foreach e $g_dbg_log {
lappend r \
"## [lindex $e 0] [lindex $e 1]:[lindex $e 2] [lindex $e 4] [lindex $e 5] ==> \[[lindex $e 3]\]"
}
return [join $r \n]
}
proc dbg_logend {} {
xtrace {}
}
proc str2hex { str } {
binary scan $str H* h
return $h
}
proc str2hash { str } {
return [exec sha256 <<"$str"]
}
proc _cmbwrd { values depth mask } {
set c [llength $values]
if { $depth < $c } {
set r [list \
{*}[_cmbwrd $values [expr $depth + 1] $mask] \
{*}[_cmbwrd $values [expr $depth + 1] \
[expr $mask | ( 1 << $depth )]]]
} else {
for { set i 0; set r {} } { $i < $c } { incr i } {
if { $mask & ( 1 << $i ) } {
lappend r [lindex $values $i]
}
}
set r [list $r]
}
return $r
}
proc cmbwrd { values } {
return [_cmbwrd $values 0 0]
}
proc out_print { msg } {
global g_log g_log_buf g_cfg
if { [dict get $g_cfg mode] == {single} } {
puts $msg
$g_log puts $msg
$g_log flush
return
}
lappend g_log_buf $msg
}
proc out_flush {} {
global g_log g_log_buf g_cfg
if { [dict get $g_cfg mode] == {single} } return
set s [join $g_log_buf \n]
set g_log_buf {}
$g_log lock -wait
puts $s
$g_log puts $s
$g_log flush
$g_log unlock
}
proc pkg_var { path name args } {
global g_tmpdb g_path_make g_path_base
set h [str2hash [join $args { }]]
$g_tmpdb query {begin deferred}
set v [$g_tmpdb query \
"select value from var where path='$path' and name='$name' and opt='$h' limit 1" \
]
$g_tmpdb query commit
if { $v == {} } {
set env [dict create ENV {}]
if { [catch {
exec $g_path_make -C $g_path_base/pkgsrc/$path \
show-var VARNAME=$name {*}$args | tee /dev/null 2>/dev/null
} v] } {
return "!! process died: $v"
}
$g_tmpdb query {begin immediate}
if { [catch {
$g_tmpdb query \
"insert into var values ( '$path', '$name', '$h', X'[str2hex $v]' )"
}] } {
out_print " ^ inserting cached variable '$name' failed for '$path'"
}
$g_tmpdb query commit
} else {
set v [dict get [lindex $v 0] value]
}
return $v
}
proc pkg_name { path } {
return [regsub {^(.*)-[^-]*$} [pkg_var $path PKGNAME] {\1}]
}
proc pkg_ver { path } {
return [regsub {^.*-([^-]*)$} [pkg_var $path PKGNAME] {\1}]
}
proc pkg_cat { path } {
return [regsub {^([^/]*)/[^/]*$} $path {\1}]
}
proc pkg_com { path } {
return [pkg_var $path COMMENT]
}
proc pkg_lic { path } {
return [pkg_var $path LICENSE]
}
proc pkg_dsc { path } {
global g_path_base
if { ![file exists $g_path_base/pkgsrc/$path/DESCR] } {
return {}
}
set f [open $g_path_base/pkgsrc/$path/DESCR r]
set d [read $f]
close $f
return $d
}
proc _pkg_dep { path type { opts {} } } {
global g_db
if { $opts != {} } {
set opts [list "PKG_OPTIONS.[pkg_name $path]=$opts"]
}
set p [split [pkg_var $path $type {*}$opts] { }]
set d {}
foreach l $p {
if { $l == {} } continue
set l [split $l :]
set n [regsub {^.*/([^/]*/[^/]*)$} [lindex $l 1] {\1}]
set n [pkg_name $n]
set v [regsub "^[string map [list + \\+] $n]" [lindex $l 0] {}]
if { [string index $v 0] == {-} } {
set v [string range $v 1 end]
} elseif { $v == {} } {
set v *any
}
lappend d "$n $v"
}
return $d
}
proc pkg_dep { path { opt {} } } {
return [_pkg_dep $path DEPENDS $opt]
}
proc pkg_deptool { path { opt {} } } {
return [_pkg_dep $path TOOL_DEPENDS $opt]
}
proc pkg_depbld { path { opt {} } } {
return [_pkg_dep $path BUILD_DEPENDS $opt]
}
proc pkg_optall { path } {
return [join [lsort -unique \
[split [pkg_var $path PKG_SUPPORTED_OPTIONS] { }]] { }]
}
proc pkg_optdef { path } {
return [pkg_var $path PKG_OPTIONS]
}
proc pkg_optnorm { path opts } {
if { $path == {-} } {
return [lsort -unique $opts]
}
set a [pkg_optall $path]
set d [pkg_optdef $path]
set r {}
foreach o $opts {
if { [lsearch $d $o] != -1 } continue
set n [regsub {^-} $o {}]
if { [lsearch $d $n] != -1 } continue
lappend r $o
}
return [lsort -unique $r]
}
proc pkg_optlst { opts def } {
set r {}
foreach o $opts {
if { [lsearch $def $o] != -1 } {
set o -$o
}
lappend r $o
}
return [lsort $r]
}
proc pkg_opthash { path opts } {
switch -- [set r [str2hash [pkg_optnorm $path $opts]]] {
12ae32cb1ec02d01eda3581b127c1fee3b0dc53572ed6baf239721a03d82e126 {
set r *def
}
}
return $r
}
proc opt_test { path { opts {} } } {
global g_path_base g_path_make
if { $opts != {} } {
set opts [list "PKG_OPTIONS.[pkg_name $path]=[join $opts { }]"]
}
set env [dict create ENV {}]
if { [catch {
exec $g_path_make -C $g_path_base/pkgsrc/$path \
can-be-built-here {*}$opts | tee /dev/null 2>/dev/null
} c] } {
return "process died: $c"
}
set c [split $c \n]
if { [lindex $c 0] == {yes} } {
return {}
}
set s 0
set r {}
foreach l $c {
if { $s } {
append r " $l"
} elseif { [string first PKG_FAIL_REASON $l] != -1 } {
set s 1
}
}
if { !$s } {
return {unknown reason}
}
set r [string trim $r]
set r [string map [list { } { }] $r]
return [regsub -all { *} $r { }]
}
proc pkg_scan { path } {
global g_db g_cfg
out_print "-- scanning '$path' ..."
if { [dict get $g_cfg append] } {
$g_db query {begin deferred}
set r [$g_db query \
"select count(*) from pkg where path='$path'"]
$g_db query commit
if { [dict get [lindex $r 0] count(*)] } {
out_print " & append: path '$path' exists"
return
}
$g_db query {begin immediate}
$g_db query "delete from dep where path='$path'"
$g_db query commit
}
set ver [pkg_ver $path]
if { [dict get $g_cfg update] } {
$g_db query {begin deferred}
set r [$g_db query "select version from pkg where path='$path'"]
$g_db query commit
if { [llength $r] } {
if { [set v [dict get [lindex $r 0] version]] == $ver } {
out_print \
" & update: path '$path' exists with known version '$ver'"
return
}
out_print \
" & update: updated version for path '$path' ( '$v' > '$ver' )"
$g_db query {begin immediate}
$g_db query "delete from pkg where path='$path'"
$g_db query "delete from dep where path='$path'"
$g_db query commit
}
}
set name [pkg_name $path]
set cat [pkg_cat $path]
set lic [pkg_lic $path]
set com [pkg_com $path]
set dsc [pkg_dsc $path]
set optall [pkg_optall $path]
set optdef [pkg_optdef $path]
set optlst [pkg_optlst $optall $optdef]
out_print " | info = $path | $name | $ver | $com | $lic | $optall | $optdef | $optlst | $com"
set var 0
set cnt 0
set dval {}
foreach opts [cmbwrd $optlst] {
set hash [pkg_opthash - $opts]
out_print " . trying options '$opts' for '$path' ($hash) ..."
if { [set reason [opt_test $path $opts]] != {} } {
out_print " ! options '$opts' failed for '$path' = $reason"
set dep {}
set depbld {}
set deptool {}
} else {
out_print " > validated options '$opts' for '$path' ($hash)"
set dep [join [pkg_dep $path $opts] :]
set depbld [join [pkg_depbld $path $opts] :]
set deptool [join [pkg_deptool $path $opts] :]
out_print " ~ dep = $dep | $depbld | $deptool"
incr var
}
incr cnt
set v "( '$path', '$name', '$opts', '$hash', '$dep',"
set v "$v '$depbld', '$deptool', X'[str2hex $reason]' )"
lappend dval $v
if { [llength $dval] == 500 } {
set q "insert into dep values [join $dval ,]"
$g_db query {begin immediate}
$g_db query $q
$g_db query commit
set dval {}
}
}
$g_db query {begin immediate}
if { [llength $dval] } {
set q "insert into dep values [join $dval ,]"
$g_db query $q
}
set q "insert into pkg values ( '$path', '$name', '$ver', $cnt, $var,"
set q "$q '$cat', '$lic', '$optall', '$optdef', '$optlst',"
set q "$q X'[str2hex $com]', X'[str2hex $dsc]' )"
$g_db query $q
$g_db query commit
}
proc pkg_thread { idx { pkgs {} } } {
global g_cfg g_db g_path_db g_tmpdb g_path_tmpdb g_path_sock
sleep 2
if { [catch {
if { [dict get $g_cfg mode] != {single} } {
set s [socket -async unix.dgram $g_path_sock]
} else {
set i 0
}
set g_db [sqlite3.open $g_path_db]
$g_db query {PRAGMA busy_timeout = 99999}
$g_db query {PRAGMA journal_mode = WAL}
set g_tmpdb [sqlite3.open $g_path_tmpdb]
$g_tmpdb query {PRAGMA busy_timeout = 99999}
$g_tmpdb query {PRAGMA journal_mode = WAL}
while 1 {
if { [dict get $g_cfg mode] != {single} } {
if { [catch {$s puts -nonewline x}] } {
out_print \
"++ server socket unavailable for thread [expr $idx + 1]"
break
}
while 1 {
if { [catch {$s read} path opts] &&
[dict get $opts -errorcode] != {NONE}
} {
out_print \
"++ server socket unavailable for thread [expr $idx + 1]"
break 2
}
if { $path != {} } break
sleep 0.1
update
}
if { $path == {_exit_} } break
out_print "?? thread [expr $idx + 1]: $path"
out_flush
out_print "== thread [expr $idx + 1] ..."
} else {
if { $i == [llength $pkgs] } break
set path [lindex $pkgs $i]
incr i
}
if { [dict get $g_cfg debug] == 1 } { xtrace _dbg_cb }
pkg_scan $path
if { [dict get $g_cfg debug] == 1 } { xtrace {} }
out_flush
}
if { [dict get $g_cfg mode] != {single} } {
out_print "__ thread [expr $idx + 1] finished"
}
out_flush
} msg opts] } {
dbg_logend
out_print "## thread [expr $idx + 1] exited"
if { [dict exists $opts -errorcode] } {
out_print "## $msg"
out_print {##}
set s [dict get $opts -errorinfo]
for { set i 0 } { $i < [llength $s] } { incr i 3 } {
if { [set p [lindex $s $i]] == {} } {
set p {$GLOBAL$}
}
out_print \
"## [lindex $s [expr $i + 1]]:[lindex $s [expr $i + 2]] scope: $p"
}
if { [dict get $g_cfg debug] == 1 } {
out_print "##\n[dbg_log]"
}
}
out_flush
}
}
proc usage {} {
global argv0
puts stderr "Usage: $argv0 ..."
puts stderr { -h : display this help screen | opt |}
puts stderr { -m [VALUE] : 'single' or 'multi' | opt | multi}
puts stderr { -t [VALUE] : number of threads in multi mode | opt | 8}
puts stderr { -u : update database (conflicts with -a) | opt | false}
puts stderr { -a : append database (conflicts with -u) | opt | false}
puts stderr { -d : enable debug output | opt | false}
puts stderr {}
exit 111
}
proc init { argv } {
global g_path_db g_db g_path_tmpdb g_tmpdb g_path_log g_log g_cfg \
g_path_sock
set optv [dict create -t threads -m mode]
for { set i 0; set c [llength $argv] } { $i < $c } { incr i } {
set o [lindex $argv $i]
if { [dict exists $optv $o] } {
if { $i == [expr $c - 1] } usage
dict set g_cfg [dict get $optv $o] \
[lindex $argv [expr $i + 1]]
incr i
continue
}
switch -exact -- $o {
-d { dict set g_cfg debug 1 }
-u {
if { [dict get $g_cfg append] } usage
dict set g_cfg update 1
}
-a {
if { [dict get $g_cfg update] } usage
dict set g_cfg append 1
}
default usage
}
}
if { [lindex $argv 0] == {single} } {
dict set g_cfg mode single
}
file delete -f $g_path_log $g_path_sock $g_path_tmpdb \
{*}[glob -nocomplain /tmp/tcl.*]
if { ![dict get $g_cfg update] && ![dict get $g_cfg append] } {
file delete -f $g_path_db
}
set g_log [open $g_path_log w]
set g_db [sqlite3.open $g_path_db]
$g_db query {PRAGMA busy_timeout = 99999}
$g_db query {PRAGMA journal_mode = WAL}
if { ![dict get $g_cfg update] && ![dict get $g_cfg append] } {
$g_db query {begin immediate}
$g_db query {
create table pkg (
path text,
name text,
version text,
count int,
variants int,
category text,
license text,
optsup text,
optdef text,
optlist text,
comment txt,
descr test
)
}
$g_db query {create unique index pkg_uid1 ON pkg( path )}
$g_db query {create unique index pkg_uid2 ON pkg( path, name )}
$g_db query {create unique index pkg_uid3 ON pkg( path, version )}
$g_db query {create unique index pkg_uid4 ON pkg( path, name, version )}
$g_db query {
create table dep (
path text,
name text,
opt text,
hash text,
deppkg text,
depbuild text,
deptool text,
error text
)
}
$g_db query {create unique index dep_uid1 ON dep( path, opt )}
$g_db query {create unique index dep_uid2 ON dep( path, hash )}
$g_db query {create unique index dep_uid3 ON dep( path, name, opt )}
$g_db query {create unique index dep_uid4 ON dep( path, name, hash )}
$g_db query commit
}
set g_tmpdb [sqlite3.open $g_path_tmpdb]
$g_tmpdb query {PRAGMA busy_timeout = 99999}
$g_tmpdb query {PRAGMA journal_mode = WAL}
$g_tmpdb query {begin immediate}
$g_tmpdb query {
create table var (
path text,
name text,
opt text,
value text
)
}
$g_tmpdb query {create unique index var_uid1 ON var( path, name, opt )}
$g_tmpdb query commit
}
proc cleanup {} {
global g_log
$g_log close
puts {__ finished}
}
init $argv
set paths [split \
[exec find $g_path_base/pkgsrc -type f \
-name Makefile -mindepth 3 -maxdepth 3] \n]
for { set i 0 } { $i < [llength $paths] } { incr i } {
lset paths $i \
[regsub {^.*/([^/]*/[^/]*)/Makefile$} [lindex $paths $i] {\1}]
}
if { [dict get $g_cfg mode] == {single} } {
pkg_thread -1 $paths
} else {
set g_pkg_index 0
set s [socket -async unix.dgram.server $g_path_sock]
$s sockopt rcvbuf [expr [dict get $g_cfg threads] * 1024]
for { set i 0 } { $i < [dict get $g_cfg threads] } { incr i } {
puts "## launching scan thread [expr $i + 1] ..."
if { ![os.fork] } {
pkg_thread $i
exit 0
}
}
for { set i 0; set j 0 } { $i < [dict get $g_cfg threads] } {} {
if { [lindex [wait -nohang -1] 0] != {NONE} } {
incr i
}
if { ![catch {$s recvfrom 1 a}] } {
if { $j >= [llength $paths] } {
set r _exit_
} else {
set r [lindex $paths $j]
incr j
}
$s sendto $r $a
}
sleep 0.1
update
}
$s close
}
cleanup
exit 0