#!/usr/bin/env python2.7 """ build.py Copyright 2010 Guillaume Lathoud, MIT License glathoud@yahoo.fr Usage: build.py [-h] [-v] [-o ] [-d ] [-w] [-r ] file1.js some/subdir/file2.js file3.js... Only the target files have to be given, because implicitely required files will be automatically detected (using req_extract.js) and integrated. Defaults: : 'build' : 'd8 req_extract.js' Break on JSCheck warnings (-w to ignore warnings) Examples: build.py from.js jscheck_web.js build.py from.js jscheck_web.js tomrec_web.js build.py from.js jscheck_web.js tomrec_web.js dom.js Actions: 1. Two output dirs _u and _c will be created, if not yet existing. They correspond to the uncompressed and compressed outputs, respectively. - examples: 'build_u' and 'build_c' 2. For each javascript (JS) file a corresponding JS file will be created in _u, packing its `from.req()` resources intelligently. - Loops are forbidden. - The original subdir structure (e.g. 'some/subdir/') is reproduced under _u. 3. Each file in _u is compressed, the compressed output is written in _c """ import getopt, hashlib, os, re, shutil, StringIO, subprocess, sys, tempfile def my_exec( cmd ): p = subprocess.Popen( ( cmd if isinstance( cmd, str ) else ' '.join( cmd ) ), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) out = p.stdout.read() err = p.stderr.read() code = p.wait() return (out, err, code) def grab( filenamelist, d8_cmd ): # req,o,all_rsrc = grap( filenamelist, d8_cmd ) req = {} o = parse( filenamelist, d8_cmd ) all_rsrc = set() update_req( req, all_rsrc, o ) assert set(req.keys()) == set(filenamelist) # sanity check return (req,o,all_rsrc,) def preprocess( filenamelist, req, o, all_rsrc, d8_cmd_prep_tailopt ): # filenamelist,req,o,all_rsrc = preprocess( filenamelist, req, o, all_rsrc ) # List the files that need preprocessing rx = re.compile( r"\/\*prep\s+(?P[^\*]+?)\s*\*\/" ) need_prep = set() for fname in all_rsrc: mo = rx.search( open( fname, 'rb' ).read() ) if mo: need_prep.add( ( fname, mo, ) ) # Apply preprocessing, if needed if need_prep: outdir = tempfile.mkdtemp('_build_py_preprocess') print print 'Preprocessing needed, outdir:', outdir translate = {} for x in need_prep: fname, mo = x prep_list = filter( lambda x: x, map( lambda x: x.strip(), mo.group('prep_list').strip().split() )) if prep_list: outfname = os.path.join( outdir, os.path.split( fname )[1] ) shutil.copyfile(fname, outfname) for prep in prep_list: if prep == 'tailopt': o,e,c = my_exec( d8_cmd_prep_tailopt + ' -- ' + outfname ) if c != 0: print >> sys.stderr print >> sys.stderr, 'Got an error from "' + d8_cmd_prep_tailopt + '": ', e sys.exit(c) open( outfname, 'wb' ).write( o ) else: print >> sys.stderr, 'In file "' + outfname + '", found an unknown preprocessor "' + prep + '"' sys.exit(1) print fname, 'preprocessed with:', prep_list, ' --> result:', outfname # Update to point to the new, preprocessed file if fname in filenamelist: filenamelist = list( filenamelist ) filenamelist.remove( fname ) filenamelist.append( outfname ) all_rsrc.remove( fname ) all_rsrc.add( outfname ) if fname in req: req[ outfname ] = req.pop( fname ) # Done return (filenamelist,req,o,all_rsrc) def parse(filenamelist, d8_cmd): o,e,c = my_exec( d8_cmd + ' -- ' + ' '.join( map( lambda s: '"' + s + '"', filenamelist ) ) ) if c != 0: print >> sys.stderr print >> sys.stderr, 'Got an error from "' + d8_cmd + '": ', e sys.exit(c) return o def update_req(req, all_rsrc, o): for line in StringIO.StringIO(o): if line.endswith(os.linesep): line = line[:-len(os.linesep)] arr = line.split(',') filename = arr[0] all_rsrc.add( filename ) if filename in req: continue req[ filename ] = { 'size' : os.stat( filename ).st_size, 'error' : arr[1], 'warning' : arr[2], 'rsrc' : [], 'rsrc_where' : [], } if len(arr) > 3: rsrc_where = [] rest = arr[3:] while rest: rsrc_where.append( { 'start' : int(rest[0]), 'end' : int(rest[1]), 'value' : rest[2], }) all_rsrc.add( rest[2] ) rest = rest[3:] req[ filename ]['rsrc_where'] = rsrc_where req[ filename ]['rsrc'] = map( lambda o: o['value'], rsrc_where ) def crash_on_error(req): for filename in req: if req[filename]['error']: print >> sys.stderr print >> sys.stderr, '[ERROR] Parse error on file "{0}"'.format(filename) sys.exit(1) def crash_on_warning(req): for filename in req: if req[filename]['warning']: print >> sys.stderr print >> sys.stderr, '[ERROR] Parse warning on file "{0}"'.format(filename) sys.exit(1) def crash_on_loop(req, filename, acc=None): acc = acc or set() if filename in acc: print >> sys.stderr print >> sys.stderr, ('[ERROR] Dependency loop detected on file "{filename}" involving {acc}' .format(**vars())) sys.exit(1) for other in req[filename]['rsrc']: # Just in case: note that we follow the order of the array crash_on_loop( req, other, acc.union([filename]) ) def expand(req, target_arr): # special case: from.js (base_pieces) F = 'from.js' if F in target_arr: while F in target_arr: target_arr.remove(F) target_arr[0:0] = [F] excluded = [] + target_arr # S = 'rsrc_expanded' for filename in target_arr: o = req[filename] o[S] = o['rsrc'] while True: tmp = [] for n in o[S]: if n in tmp: continue if n in excluded: continue to_add = [] if n not in excluded: to_add.extend(req[n]['rsrc']) to_add.append(n) for p in to_add: if (p != n) and (p in excluded): continue if p not in tmp: tmp.append(os.path.split(p)[1]) if o[S] == tmp: break o[S] = tmp if filename == F: o[S] = [filename] + o[S] # special case: from.js comes *just before* its `base_pieces` else: o[S].append( filename ) if filename == F: # special case: from.js (`base_pieces`) excluded.extend( o[S] ) print 'expanded:', filename, '->', o[S] def cleanupdir( d ): if os.path.exists( d ): shutil.rmtree( d ) if not os.path.exists( d ): os.makedirs( d ) def update_paths( req, filenamelist, transform = None, filename = None ): if filename == None: for filename in filenamelist: update_paths( req, filenamelist, transform, filename ) return outputfilename = req[filename]['to'] data = open(outputfilename, 'rb').read() new_data = data reversed = True # We'll start with the last one for o in sorted( req[filename]['rsrc_where_to'], None, lambda o: o['start'], reversed ): if transform and (o['value'] in transform): # Custom transformation old_v = o['value'] new_v = transform[old_v] # dict elif o['value'] in filenamelist: # Default transformation old_v = o['value'] new_v = req[o['value']]['to'] else: continue print '############## old, new:', old_v, new_v assert new_data[ o['start']:o['end'] ] == old_v new_data = new_data[:o['start']] + new_v + new_data[o['end']:] o['value'] = new_v o['end'] = o['start'] + len(new_v) # All following values where shifted delta = len(new_v) - len(old_v) if delta != 0: for o2 in req[filename]['rsrc_where_to']: if o2['start'] <= o['start']: continue o2['start'] += delta o2['end'] += delta if data != new_data: open(outputfilename, 'wb').write(new_data) def sanity_check(req, filenamelist): for filename in filenamelist: import pprint pprint.pprint(req[filename]) data = open(req[filename]['to'], 'rb').read() for o in req[filename]['rsrc_where_to']: print o['value'] print data[ o['start'] : o['end'] ] assert o['value'] == data[ o['start'] : o['end'] ] def main(filenamelist, output, d8_cmd="d8 req_extract.js", d8_cmd_prep_tailopt="d8 prep_tailopt.js", compress=False, verbose=False, warning_ignore=False, copyrightfile="build.copyright.txt"): output_u = output + '_u' output_c = output + '_c' # for each filename, grab its required resources req,o,all_rsrc = grab( filenamelist, d8_cmd ) # recursively parse all required files while True: to_parse = set(all_rsrc).difference(set(req.keys())) if not to_parse: break update_req( req, all_rsrc, parse( to_parse, d8_cmd ) ) # make sure all required resources are available missing_rsrc = filter( lambda s: not os.path.exists(s), all_rsrc ) if missing_rsrc: print >> sys.stderr print >> sys.stderr, '[ERROR] Not all resources found! missing:', missing_rsrc sys.exit(1) # checks crash_on_error( req ) if not warning_ignore: crash_on_warning( req ) for filename in req: crash_on_loop( req, filename ) # apply preprocessors, if any. # - declared with a Javascript comment /*prep -> */ /*prep tailopt */ /*prep -> tailopt*/ # - applied in the order (e.g. -> first, then tailopt, in /*prep -> tailopt*/). # # Note: this will probably copy some of the files to a temporary directory... filenamelist,req,o,all_rsrc = preprocess( filenamelist,req,o,all_rsrc, d8_cmd_prep_tailopt ) # ...so we have to do it again: for each filename, grab its required resources. req,o,all_rsrc = grab( filenamelist, d8_cmd ) # recursively parse all required files while True: to_parse = set(all_rsrc).difference(set(req.keys())) if not to_parse: break update_req( req, all_rsrc, parse( to_parse, d8_cmd ) ) # make sure all required resources are available missing_rsrc = filter( lambda s: not os.path.exists(s), all_rsrc ) if missing_rsrc: print >> sys.stderr print >> sys.stderr, '[ERROR] Not all resources found! missing:', missing_rsrc sys.exit(1) # checks crash_on_error( req ) for filename in req: crash_on_loop( req, filename ) # for each target, expand all resources that are not themselves targets expand(req, filenamelist) # produce the expanded files print '.' cleanupdir( output_u ) for filename in filenamelist: outputfilename = os.path.join( output_u, os.path.split( filename )[1] ) if os.path.exists( outputfilename ): raise IOError("Output file already exists !!" + outputfilename) req[filename]['to'] = outputfilename separator = os.linesep + '// ' + ('-' * 70) + os.linesep for filename in filenamelist: outputfilename = req[filename]['to'] rsrc_where_to = [] offset = 0 print '=' * 30, filename out = open(outputfilename, 'wb') for f in req[filename]['rsrc_expanded']: data = open(f,'rb').read() out.write(data) print >> out, separator, # Mark where the package names are in the source, because # we'll change them a bit (e.g. insert 'build_u/' path and # hash). for o in req[f]['rsrc_where']: o2 = {} for k in o: o2[k] = o[k] o2['start'] += offset o2['end'] += offset o2['value'] = os.path.split( o2['value'] )[1] rsrc_where_to.append(o2) # For the next included files offset += len(data) + len(separator) req[filename]['rsrc_where_to'] = rsrc_where_to out.close() print 'Wrote:', outputfilename req[filename]['to'] = outputfilename # Update the paths to the target files update_paths( req, filenamelist ) # Hashes for browser cachebust, similar to that of: # http://glat.info/pub/dojo_cachebust_hash/dojo_cachebust_hash.xhtml def hh( data ): return '.'.join( [ hashlib.md5( data ).hexdigest() , str( len( data ) ) ] ) hash_table = {} for filename in filenamelist: if filename == 'from.js': continue current = req[filename]['to'] data = open(current, 'rb').read() hash_table[current] = hh( data ) import pprint pprint.pprint( hash_table) s = 'from.js' if s in filenamelist: old = req[s]['to'] # insert the hash table in "from.js" (package cachebust) old_data = open( old, 'rb' ).read() rx = re.compile( r'(hash_table)\s*=\s*\{\s*\}' ) mo = rx.search( old_data ) if mo == None: print >> sys.stderr, '' print >> sys.stderr, '[ERROR] Could not find the `hash_table` definition in "from.js"' sys.exit(1) new_str = mo.group(1) + '=' + str(hash_table) new_data = old_data[:mo.start()] + new_str + old_data[mo.end():] offset = len(new_data) - len(old_data) req[s]['to'] += '.' + hh( new_data ) + '.js' # Append also a .js to ensure MIME type (on most servers) # update existing reference (most likely `base_pieces`) for o in req[s]['rsrc_where_to']: if o['start'] < mo.end(): continue o['start'] += offset o['end'] += offset # add the new references present in the `hash_table` rx2 = re.compile(r"(?P[\"'])(?P[^'\"]+)(?P=quote)\s*:") for mo2 in rx2.finditer(new_str): req[s]['rsrc_where_to'].append( { 'value' : mo2.group('name'), 'start' : mo.start() + mo2.start('name'), 'end' : mo.start() + mo2.end('name'), } ) import pprint pprint.pprint('----------') o = req[s]['rsrc_where_to'][-1] pprint.pprint(o) pprint.pprint(data[o['start']:o['end']]) # change the filename of "from.js" (entry point cachebust) open( req[s]['to'], 'wb' ).write( new_data ) os.remove( old ) # --- Sanity check sanity_check( req, filenamelist ) # --- Produce the compressed files print '.' cleanupdir( output_c ) transform = {} for filename in filenamelist: # copy the file old = req[filename]['to'] new = os.path.join( *(map(lambda s: output_c if (s == output_u) else s, os.path.split( old ))) ) if os.path.exists( new ): raise IOError("Output file already exists !!" + new) shutil.copyfile(old, new) req[filename]['to'] = new transform[old] = new # update the contents accordingly update_paths( req, filenamelist, transform ) sanity_check( req, filenamelist ) # code compression copyrighttext = '' if os.path.exists( copyrightfile ): copyrighttext = open( copyrightfile, 'rb' ).read() for filename in filenamelist: new = req[filename]['to'] if not os.path.exists( new ): raise IOError("Built file does not exist !! " + new) cmd = ("d8 -e \"load('from.js');from.req('jscheck.js','narcissus.code.compress.js')(function () {" + "print(narcissus.code.compress(read('" + new + "')).compressed_code);" + "});\"") o,e,c = my_exec(cmd) if c != 0: raise IOError("Failed to parse and regenerate file " + new) f = open(new, 'wb') print >> f, copyrighttext f.write(o) f.close() print 'build.py: compressed "' + new + '"' if not copyrighttext: print '(no copyrighttext)' else: print 'Copyright text:' print copyrighttext if __name__ == '__main__': try: opts, args = getopt.gnu_getopt(sys.argv[1:], "ho:d:cvwr", ["help", "output=", "d8_cmd=", "d8_cmd_prep_tailopt=", "compress", "verbose", "warning_ignore", "copyrightfile" ]) except getopt.GetoptError, err: # print help information and exit: print str(err) # will print something like "option -a not recognized" print __doc__ sys.exit(2) d8_cmd = "d8 req_extract.js" d8_cmd_prep_tailopt = "d8 prep_tailopt.js" verbose = False compress = False output = None warning_ignore = False copyrightfile = "build.copyright.txt" for o, a in opts: if o in ("-v", '--verbose'): verbose = True elif o in ("-h", "--help"): print __doc__ sys.exit() elif o in ('-c', '--compress'): compress = True elif o in ('-d', '--d8_cmd'): d8_cmd = a elif o in ('-dpt', '--d8_cmd_prep_tailopt'): d8_cmd_prep_tailopt = a elif o in ('-o', '--output'): output = a elif o in ('-w', '--warning_ignore'): warning_ignore = True elif o in ('-r', '--copyrightfile'): copyrightfile = a else: assert False, "unhandled option" if output == None: output = "build" if len(args) < 1: print __doc__ sys.exit() main(args, output, d8_cmd, d8_cmd_prep_tailopt, compress, verbose, warning_ignore, copyrightfile)