3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
41 $Id: meta2deps.py,v 1.40 2021/12/13 19:32:46 sjg Exp $
43 Copyright (c) 2011-2020, Simon J. Gerraty
44 Copyright (c) 2011-2017, Juniper Networks, Inc.
47 Redistribution and use in source and binary forms, with or without
48 modification, are permitted provided that the following conditions
50 1. Redistributions of source code must retain the above copyright
51 notice, this list of conditions and the following disclaimer.
52 2. Redistributions in binary form must reproduce the above copyright
53 notice, this list of conditions and the following disclaimer in the
54 documentation and/or other materials provided with the distribution.
56 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
57 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
58 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
59 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
60 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
61 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
62 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
63 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
64 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
65 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
66 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
72 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
74 Return an absolute path, resolving via cwd or last_dir if needed.
76 if path.endswith('/.'):
78 if len(path) > 0 and path[0] == '/':
79 if os.path.exists(path):
82 print("skipping non-existent:", path, file=debug_out)
86 if path.startswith('./'):
90 for d in [last_dir, cwd]:
99 p = '/'.join([d,path])
101 print("looking for:", p, end=' ', file=debug_out)
102 if not os.path.exists(p):
104 print("nope", file=debug_out)
108 print("found:", p, file=debug_out)
113 """cleanup path without using realpath(3)"""
114 if path.startswith('/'):
121 if not d or d == '.':
131 return r + '/'.join(p)
133 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
135 Return an absolute path, resolving via cwd or last_dir if needed.
136 this gets called a lot, so we try to avoid calling realpath.
138 rpath = resolve(path, cwd, last_dir, debug, debug_out)
141 elif len(path) > 0 and path[0] == '/':
143 if (path.find('/') < 0 or
144 path.find('./') > 0 or
145 path.endswith('/..')):
146 path = cleanpath(path)
149 def sort_unique(list, cmp=None, key=None, reverse=False):
150 if sys.version_info[0] == 2:
151 list.sort(cmp, key, reverse)
153 list.sort(reverse=reverse)
164 return ['/' + x + '/',
169 def target_spec_exts(target_spec):
170 """return a list of dirdep extensions that could match target_spec"""
172 if target_spec.find(',') < 0:
173 return ['.'+target_spec]
174 w = target_spec.split(',')
178 e.append('.'+','.join(w[0:n]))
183 """class to parse meta files generated by bmake."""
196 def __init__(self, name, conf={}):
197 """if name is set we will parse it now.
198 conf can have the follwing keys:
200 SRCTOPS list of tops of the src tree(s).
202 CURDIR the src directory 'bmake' was run from.
204 RELDIR the relative path from SRCTOP to CURDIR
206 MACHINE the machine we built for.
207 set to 'none' if we are not cross-building.
208 More specifically if machine cannot be deduced from objdirs.
211 Sometimes MACHINE isn't enough.
214 when we build for the pseudo machine 'host'
215 the object tree uses HOST_TARGET rather than MACHINE.
217 OBJROOTS a list of the common prefix for all obj dirs it might
220 DPDEPS names an optional file to which per file dependencies
222 For example if 'some/path/foo.h' is read from SRCTOP
223 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
224 This can allow 'bmake' to learn all the dirs within
225 the tree that depend on 'foo.h'
228 A list of paths to ignore.
229 ccache(1) can otherwise be trouble.
231 debug desired debug level
233 debug_out open file to send debug output to (sys.stderr)
238 self.debug = conf.get('debug', 0)
239 self.debug_out = conf.get('debug_out', sys.stderr)
241 self.machine = conf.get('MACHINE', '')
242 self.machine_arch = conf.get('MACHINE_ARCH', '')
243 self.target_spec = conf.get('TARGET_SPEC', self.machine)
244 self.exts = target_spec_exts(self.target_spec)
245 self.curdir = conf.get('CURDIR')
246 self.reldir = conf.get('RELDIR')
247 self.dpdeps = conf.get('DPDEPS')
251 # some of the steps below we want to do only once
253 self.host_target = conf.get('HOST_TARGET')
254 for srctop in conf.get('SRCTOPS', []):
255 if srctop[-1] != '/':
257 if not srctop in self.srctops:
258 self.srctops.append(srctop)
259 _srctop = os.path.realpath(srctop)
260 if _srctop[-1] != '/':
262 if not _srctop in self.srctops:
263 self.srctops.append(_srctop)
265 trim_list = add_trims(self.machine)
266 if self.machine == 'host':
267 trim_list += add_trims(self.host_target)
268 if self.target_spec != self.machine:
269 trim_list += add_trims(self.target_spec)
271 for objroot in conf.get('OBJROOTS', []):
273 if objroot.endswith(e):
274 # this is not what we want - fix it
275 objroot = objroot[0:-len(e)]
277 if objroot[-1] != '/':
279 if not objroot in self.objroots:
280 self.objroots.append(objroot)
281 _objroot = os.path.realpath(objroot)
282 if objroot[-1] == '/':
284 if not _objroot in self.objroots:
285 self.objroots.append(_objroot)
287 # we want the longest match
288 self.srctops.sort(reverse=True)
289 self.objroots.sort(reverse=True)
291 self.excludes = conf.get('EXCLUDES', [])
294 print("host_target=", self.host_target, file=self.debug_out)
295 print("srctops=", self.srctops, file=self.debug_out)
296 print("objroots=", self.objroots, file=self.debug_out)
297 print("excludes=", self.excludes, file=self.debug_out)
298 print("ext_list=", self.exts, file=self.debug_out)
300 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
302 if self.dpdeps and not self.reldir:
304 print("need reldir:", end=' ', file=self.debug_out)
306 srctop = self.find_top(self.curdir, self.srctops)
308 self.reldir = self.curdir.replace(srctop,'')
310 print(self.reldir, file=self.debug_out)
312 self.dpdeps = None # we cannot do it?
314 self.cwd = os.getcwd() # make sure this is initialized
315 self.last_dir = self.cwd
321 """reset state if we are being passed meta files from multiple directories."""
327 def dirdeps(self, sep='\n'):
329 return sep.strip() + sep.join(self.obj_deps)
331 def src_dirdeps(self, sep='\n'):
332 """return SRC_DIRDEPS"""
333 return sep.strip() + sep.join(self.src_deps)
335 def file_depends(self, out=None):
336 """Append DPDEPS_${file} += ${RELDIR}
337 for each file we saw, to the output file."""
340 for f in sort_unique(self.file_deps):
341 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
342 # these entries provide for reverse DIRDEPS lookup
343 for f in self.obj_deps:
344 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
346 def seenit(self, dir):
347 """rememer that we have seen dir."""
350 def add(self, list, data, clue=''):
351 """add data to list if it isn't already there."""
355 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
357 def find_top(self, path, list):
358 """the logical tree may be split across multiple trees"""
360 if path.startswith(top):
362 print("found in", top, file=self.debug_out)
366 def find_obj(self, objroot, dir, path, input):
367 """return path within objroot, taking care of .dirdep files"""
369 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
370 if not ddep and os.path.exists(ddepf):
371 ddep = open(ddepf, 'r').readline().strip('# \n')
373 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
376 ddep = ddep[0:-len(e)]
380 # no .dirdeps, so remember that we've seen the raw input
383 if self.machine == 'none':
384 if dir.startswith(objroot):
385 return dir.replace(objroot,'')
387 m = self.dirdep_re.match(dir.replace(objroot,''))
390 dmachine = m.group(1)
391 if dmachine != self.machine:
392 if not (self.machine == 'host' and
393 dmachine == self.host_target):
395 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
396 ddep += '.' + dmachine
400 def try_parse(self, name=None, file=None):
401 """give file and line number causing exception"""
403 self.parse(name, file)
406 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
409 def parse(self, name=None, file=None):
410 """A meta file looks like:
412 # Meta data file "path"
417 -- filemon acquired metadata --
427 L "pid" "src" "target"
432 We go to some effort to avoid processing a dependency more than once.
433 Of the above record types only C,E,F,L,R,V and W are of interest.
436 version = 0 # unknown
441 cwd = self.last_dir = self.cwd
443 f = open(self.name, 'r')
451 self.seenit(self.curdir) # we ignore this
453 interesting = 'CEFLRV'
456 # ignore anything we don't care about
457 if not line[0] in interesting:
460 print("input:", line, end=' ', file=self.debug_out)
469 # we cannot ignore 'W' records
470 # as they may be 'rw'
474 self.cwd = cwd = self.last_dir = w[1]
475 self.seenit(cwd) # ignore this
477 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
483 pid_last_dir[last_pid] = self.last_dir
484 cwd = pid_cwd.get(pid, self.cwd)
485 self.last_dir = pid_last_dir.get(pid, self.cwd)
492 pid_last_dir[npid] = cwd
496 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
500 print("missing cwd=", cwd, file=self.debug_out)
501 if cwd.endswith('/.'):
503 self.last_dir = pid_last_dir[pid] = cwd
506 print("cwd=", cwd, file=self.debug_out)
509 if w[2] in self.seen:
511 print("seen:", w[2], file=self.debug_out)
515 # these are special, tread src as read and
517 self.parse_path(w[2].strip("'"), cwd, 'R', w)
518 self.parse_path(w[3].strip("'"), cwd, 'W', w)
524 self.parse_path(path, cwd, w[0], w)
530 def is_src(self, base, dir, rdir):
531 """is base in srctop"""
532 for dir in [dir,rdir]:
535 path = '/'.join([dir,base])
536 srctop = self.find_top(path, self.srctops)
539 self.add(self.file_deps, path.replace(srctop,''), 'file')
540 self.add(self.src_deps, dir.replace(srctop,''), 'src')
545 def parse_path(self, path, cwd, op=None, w=[]):
546 """look at a path for the op specified"""
551 # we are never interested in .dirdep files as dependencies
552 if path.endswith('.dirdep'):
554 for p in self.excludes:
555 if p and path.startswith(p):
557 print("exclude:", p, path, file=self.debug_out)
559 # we don't want to resolve the last component if it is
561 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
564 dir,base = os.path.split(path)
567 print("seen:", dir, file=self.debug_out)
569 # we can have a path in an objdir which is a link
570 # to the src dir, we may need to add dependencies for each
572 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
574 rdir = os.path.realpath(dir)
579 # now put path back together
580 path = '/'.join([dir,base])
582 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
584 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
586 print("skipping:", path, file=self.debug_out)
588 if os.path.isdir(path):
590 self.last_dir = path;
592 print("ldir=", self.last_dir, file=self.debug_out)
596 # finally, we get down to it
597 if dir == self.cwd or dir == self.curdir:
599 if self.is_src(base, dir, rdir):
605 for dir in [dir,rdir]:
608 objroot = self.find_top(dir, self.objroots)
612 ddep = self.find_obj(objroot, dir, path, w[2])
614 self.add(self.obj_deps, ddep, 'obj')
615 if self.dpdeps and objroot.endswith('/stage/'):
616 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
617 self.add(self.file_deps, sp, 'file')
619 # don't waste time looking again
624 def main(argv, klass=MetaFile, xopts='', xoptf=None):
625 """Simple driver for class MetaFile.
628 script [options] [key=value ...] "meta" ...
630 Options and key=value pairs contribute to the
631 dictionary passed to MetaFile.
634 add "SRCTOP" to the "SRCTOPS" list.
639 add "OBJROOT" to the "OBJROOTS" list.
654 # import Psyco if we can
655 # it can speed things up quite a bit
671 machine = os.environ['MACHINE']
673 conf['MACHINE'] = machine
674 machine_arch = os.environ['MACHINE_ARCH']
676 conf['MACHINE_ARCH'] = machine_arch
677 srctop = os.environ['SB_SRC']
679 conf['SRCTOPS'].append(srctop)
680 objroot = os.environ['SB_OBJROOT']
682 conf['OBJROOTS'].append(objroot)
689 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
692 conf['MACHINE_ARCH'] = a
698 conf['HOST_TARGET'] = a
700 if a not in conf['SRCTOPS']:
701 conf['SRCTOPS'].append(a)
705 if a not in conf['OBJROOTS']:
706 conf['OBJROOTS'].append(a)
714 conf['TARGET_SPEC'] = a
716 if a not in conf['EXCLUDES']:
717 conf['EXCLUDES'].append(a)
721 conf['debug'] = debug
723 # get any var=val assignments
728 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
744 debug_out = conf.get('debug_out', sys.stderr)
747 print("config:", file=debug_out)
748 print("psyco=", have_psyco, file=debug_out)
749 for k,v in list(conf.items()):
750 print("%s=%s" % (k,v), file=debug_out)
754 if a.endswith('.meta'):
755 if not os.path.exists(a):
758 elif a.startswith('@'):
759 # there can actually multiple files per line
760 for line in open(a[1:]):
761 for f in line.strip().split():
762 if not os.path.exists(f):
769 print(m.src_dirdeps('\nsrc:'))
771 dpdeps = conf.get('DPDEPS')
773 m.file_depends(open(dpdeps, 'w'))
777 if __name__ == '__main__':
781 # yes, this goes to stdout
782 print("ERROR: ", sys.exc_info()[1])