4 This script parses each "meta" file and extracts the
5 information needed to deduce build and src dependencies.
7 It works much the same as the original shell script, but is
10 The parsing work is handled by the class MetaFile.
11 We only pay attention to a subset of the information in the
12 "meta" files. Specifically:
14 'CWD' to initialize our notion.
16 'C' to track chdir(2) on a per process basis
18 'R' files read are what we really care about.
19 directories read, provide a clue to resolving
20 subsequent relative paths. That is if we cannot find
21 them relative to 'cwd', we check relative to the last
24 'W' files opened for write or read-write,
25 for filemon V3 and earlier.
31 'V' the filemon version, this record is used as a clue
32 that we have reached the interesting bit.
38 $Id: meta2deps.py,v 1.13 2013/05/11 05:16:26 sjg Exp $
40 Copyright (c) 2011-2013, Juniper Networks, Inc.
43 Redistribution and use in source and binary forms, with or without
44 modification, are permitted provided that the following conditions
46 1. Redistributions of source code must retain the above copyright
47 notice, this list of conditions and the following disclaimer.
48 2. Redistributions in binary form must reproduce the above copyright
49 notice, this list of conditions and the following disclaimer in the
50 documentation and/or other materials provided with the distribution.
52 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
53 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
54 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
55 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
56 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
62 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 def getv(dict, key, d=None):
69 """Lookup key in dict and return value or the supplied default."""
74 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
76 Return an absolute path, resolving via cwd or last_dir if needed.
78 if path.endswith('/.'):
84 if path.startswith('./'):
88 for d in [last_dir, cwd]:
91 p = '/'.join([d,path])
93 print >> debug_out, "looking for:", p,
94 if not os.path.exists(p):
96 print >> debug_out, "nope"
100 print >> debug_out, "found:", p
104 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
106 Return an absolute path, resolving via cwd or last_dir if needed.
107 this gets called a lot, so we try to avoid calling realpath
108 until we know we have something.
110 path = resolve(path, cwd, last_dir, debug, debug_out)
111 if path and (path.find('./') > 0 or
112 path.endswith('/..') or
113 os.path.islink(path)):
114 return os.path.realpath(path)
117 def sort_unique(list, cmp=None, key=None, reverse=False):
118 list.sort(cmp, key, reverse)
128 return ['/' + x + '/',
134 """class to parse meta files generated by bmake."""
147 def __init__(self, name, conf={}):
148 """if name is set we will parse it now.
149 conf can have the follwing keys:
151 SRCTOPS list of tops of the src tree(s).
153 CURDIR the src directory 'bmake' was run from.
155 RELDIR the relative path from SRCTOP to CURDIR
157 MACHINE the machine we built for.
158 set to 'none' if we are not cross-building.
159 More specifically if machine cannot be deduced from objdirs.
162 Sometimes MACHINE isn't enough.
165 when we build for the psuedo machine 'host'
166 the object tree uses HOST_TARGET rather than MACHINE.
168 OBJROOTS a list of the common prefix for all obj dirs it might
171 DPDEPS names an optional file to which per file dependencies
173 For example if 'some/path/foo.h' is read from SRCTOP
174 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
175 This can allow 'bmake' to learn all the dirs within
176 the tree that depend on 'foo.h'
178 debug desired debug level
180 debug_out open file to send debug output to (sys.stderr)
185 self.debug = getv(conf, 'debug', 0)
186 self.debug_out = getv(conf, 'debug_out', sys.stderr)
188 self.machine = getv(conf, 'MACHINE', '')
189 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
190 self.target_spec = getv(conf, 'TARGET_SPEC', '')
191 self.curdir = getv(conf, 'CURDIR')
192 self.reldir = getv(conf, 'RELDIR')
193 self.dpdeps = getv(conf, 'DPDEPS')
196 # some of the steps below we want to do only once
198 self.host_target = getv(conf, 'HOST_TARGET')
199 for srctop in getv(conf, 'SRCTOPS', []):
200 if srctop[-1] != '/':
202 if not srctop in self.srctops:
203 self.srctops.append(srctop)
204 _srctop = os.path.realpath(srctop)
205 if _srctop[-1] != '/':
207 if not _srctop in self.srctops:
208 self.srctops.append(_srctop)
210 trim_list = add_trims(self.machine)
211 if self.machine == 'host':
212 trim_list += add_trims(self.host_target)
214 trim_list += add_trims(self.target_spec)
216 for objroot in getv(conf, 'OBJROOTS', []):
218 if objroot.endswith(e):
219 # this is not what we want - fix it
220 objroot = objroot[0:-len(e)]
223 if not objroot in self.objroots:
224 self.objroots.append(objroot)
225 _objroot = os.path.realpath(objroot)
226 if objroot[-1] == '/':
228 if not _objroot in self.objroots:
229 self.objroots.append(_objroot)
231 # we want the longest match
232 self.srctops.sort(reverse=True)
233 self.objroots.sort(reverse=True)
236 print >> self.debug_out, "host_target=", self.host_target
237 print >> self.debug_out, "srctops=", self.srctops
238 print >> self.debug_out, "objroots=", self.objroots
240 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
242 if self.dpdeps and not self.reldir:
244 print >> self.debug_out, "need reldir:",
246 srctop = self.find_top(self.curdir, self.srctops)
248 self.reldir = self.curdir.replace(srctop,'')
250 print >> self.debug_out, self.reldir
252 self.dpdeps = None # we cannot do it?
254 self.cwd = os.getcwd() # make sure this is initialized
260 """reset state if we are being passed meta files from multiple directories."""
266 def dirdeps(self, sep='\n'):
268 return sep.strip() + sep.join(self.obj_deps)
270 def src_dirdeps(self, sep='\n'):
271 """return SRC_DIRDEPS"""
272 return sep.strip() + sep.join(self.src_deps)
274 def file_depends(self, out=None):
275 """Append DPDEPS_${file} += ${RELDIR}
276 for each file we saw, to the output file."""
279 for f in sort_unique(self.file_deps):
280 print >> out, 'DPDEPS_%s += %s' % (f, self.reldir)
282 def seenit(self, dir):
283 """rememer that we have seen dir."""
286 def add(self, list, data, clue=''):
287 """add data to list if it isn't already there."""
291 print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data)
293 def find_top(self, path, list):
294 """the logical tree may be split accross multiple trees"""
296 if path.startswith(top):
298 print >> self.debug_out, "found in", top
302 def find_obj(self, objroot, dir, path, input):
303 """return path within objroot, taking care of .dirdep files"""
305 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
306 if not ddep and os.path.exists(ddepf):
307 ddep = open(ddepf, 'rb').readline().strip('# \n')
309 print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep)
310 if ddep.endswith(self.machine):
311 ddep = ddep[0:-(1+len(self.machine))]
312 elif self.target_spec and ddep.endswith(self.target_spec):
313 ddep = ddep[0:-(1+len(self.target_spec))]
316 # no .dirdeps, so remember that we've seen the raw input
319 if self.machine == 'none':
320 if dir.startswith(objroot):
321 return dir.replace(objroot,'')
323 m = self.dirdep_re.match(dir.replace(objroot,''))
326 dmachine = m.group(1)
327 if dmachine != self.machine:
328 if not (self.machine == 'host' and
329 dmachine == self.host_target):
331 print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep)
332 ddep += '.' + dmachine
336 def parse(self, name=None, file=None):
337 """A meta file looks like:
339 # Meta data file "path"
344 -- filemon acquired metadata --
354 L "pid" "src" "target"
359 We go to some effort to avoid processing a dependency more than once.
360 Of the above record types only C,E,F,L,R,V and W are of interest.
363 version = 0 # unknown
368 cwd = last_dir = self.cwd
370 f = open(self.name, 'rb')
377 self.seenit(self.curdir) # we ignore this
379 interesting = 'CEFLRV'
381 # ignore anything we don't care about
382 if not line[0] in interesting:
385 print >> self.debug_out, "input:", line,
394 # we cannot ignore 'W' records
395 # as they may be 'rw'
399 self.cwd = cwd = last_dir = w[1]
400 self.seenit(cwd) # ignore this
402 print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd)
408 pid_cwd[last_pid] = cwd
409 pid_last_dir[last_pid] = last_dir
410 cwd = getv(pid_cwd, pid, self.cwd)
411 last_dir = getv(pid_last_dir, pid, self.cwd)
418 pid_last_dir[npid] = cwd
422 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
423 if cwd.endswith('/.'):
427 print >> self.debug_out, "cwd=", cwd
430 if w[2] in self.seen:
432 print >> self.debug_out, "seen:", w[2]
436 path = w[2].strip("'")
439 # we are never interested in .dirdep files as dependencies
440 if path.endswith('.dirdep'):
442 # we don't want to resolve the last component if it is
444 path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
447 dir,base = os.path.split(path)
450 print >> self.debug_out, "seen:", dir
452 # we can have a path in an objdir which is a link
453 # to the src dir, we may need to add dependencies for each
455 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
456 if rdir == dir or rdir.find('./') > 0:
458 # now put path back together
459 path = '/'.join([dir,base])
461 print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path)
463 if w[0] == 'W' and path.endswith('.dirdep'):
465 if path in [last_dir, cwd, self.cwd, self.curdir]:
467 print >> self.debug_out, "skipping:", path
469 if os.path.isdir(path):
473 print >> self.debug_out, "ldir=", last_dir
477 # finally, we get down to it
478 if dir == self.cwd or dir == self.curdir:
480 srctop = self.find_top(path, self.srctops)
483 self.add(self.file_deps, path.replace(srctop,''), 'file')
484 self.add(self.src_deps, dir.replace(srctop,''), 'src')
487 if rdir and not rdir.startswith(srctop):
488 dir = rdir # for below
494 for dir in [dir,rdir]:
497 objroot = self.find_top(dir, self.objroots)
501 ddep = self.find_obj(objroot, dir, path, w[2])
503 self.add(self.obj_deps, ddep, 'obj')
505 # don't waste time looking again
512 def main(argv, klass=MetaFile, xopts='', xoptf=None):
513 """Simple driver for class MetaFile.
516 script [options] [key=value ...] "meta" ...
518 Options and key=value pairs contribute to the
519 dictionary passed to MetaFile.
522 add "SRCTOP" to the "SRCTOPS" list.
527 add "OBJROOT" to the "OBJROOTS" list.
542 # import Psyco if we can
543 # it can speed things up quite a bit
558 machine = os.environ['MACHINE']
560 conf['MACHINE'] = machine
561 machine_arch = os.environ['MACHINE_ARCH']
563 conf['MACHINE_ARCH'] = machine_arch
564 srctop = os.environ['SB_SRC']
566 conf['SRCTOPS'].append(srctop)
567 objroot = os.environ['SB_OBJROOT']
569 conf['OBJROOTS'].append(objroot)
576 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts)
579 conf['MACHINE_ARCH'] = a
585 conf['HOST_TARGET'] = a
587 if a not in conf['SRCTOPS']:
588 conf['SRCTOPS'].append(a)
592 if a not in conf['OBJROOTS']:
593 conf['OBJROOTS'].append(a)
601 conf['TARGET_SPEC'] = a
605 conf['debug'] = debug
607 # get any var=val assignments
612 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
628 debug_out = getv(conf, 'debug_out', sys.stderr)
631 print >> debug_out, "config:"
632 print >> debug_out, "psyco=", have_psyco
633 for k,v in conf.items():
634 print >> debug_out, "%s=%s" % (k,v)
642 print m.src_dirdeps('\nsrc:')
644 dpdeps = getv(conf, 'DPDEPS')
646 m.file_depends(open(dpdeps, 'wb'))
650 if __name__ == '__main__':
654 # yes, this goes to stdout
655 print "ERROR: ", sys.exc_info()[1]