Contrary to the perlpod manpage, Interscript pod commands are terminated at the end of a line, not the end of a 'paragraph'. It shouldn't make the slightest bit of difference, however, since weavers ignore blank lines anyhow.
When the tangler is in paragraph mode, blank lines are translated to end of paragraph commands. Paragraph mode is triggered by any non-command non-blank data lines, so you won't get an end of paragraph after a command before the first text.
Currently, support for the for/begin/end pod constructions is a hack. Interscript uses a different (better!) mechanism.
1: #line 22 "perl_tangler.ipk" 2: from interscript.tanglers.base import tangler_base 3: from interscript.tanglers.c import c_string_tangler 4: import re 5: import string 6: 7: class perl_tangler(tangler_base): 8: def __init__(self,sink,weaver, heading_level_offset = 2): 9: tangler_base.__init__(self,sink,weaver) 10: self.language = 'perl' 11: 12: self.mode = 'code' 13: self.list_type = [] 14: self.pod_re = re.compile('^=([A-Za-z][A-Za-z0-9_]*) *(.*)$') 15: self.heading_level_offset = heading_level_offset 16: self.esc_re = re.compile('^(.*?)(>|[IBSCLFXZE]<)(.*)$') 17: self.digits_re = re.compile('^([0-9]+)>(.*)$') 18: self.entity_re = re.compile('^([A-Za-z]+)>(.*)$') 19: # this is not a full list, we should in fact call a weaver routine. 20: self.html_entity = { 21: 'amp':'&', 22: 'lt':'<', 23: 'gt':'>', 24: 'quot':'"', 25: 'copy':'C', 26: 'trade':'T', 27: 'reg':'R'} 28: 29: def __del__(self): 30: self.flow_escape() 31: self.end_lists() 32: 33: def flow_escape(self): 34: line = self.flow_text 35: if not line: return 36: self.flow_text = '' 37: # process balanced text, 38: # if there is an unbalanced >, the text after it is returned 39: # write a >, and then try again. 40: tail = self.flow_parse(line) 41: while tail: 42: if verbosity >=4: print 'Unbalanced > in perl POD text' 43: self.weaver.write('>') 44: tail = self.flow_parse(tail) 45: 46: # recursive descent parser 47: def flow_parse(self,tail): 48: if not tail: return '' 49: weaver = self.weaver 50: 51: match = self.esc_re.match(tail) 52: while match: 53: pre, cmd, tail = match.group(1,2,3) 54: if pre: weaver.write(pre) 55: if cmd=='>': return tail 56: 57: assert len(cmd)==2 and cmd[1]=='<' 58: cmd = cmd[0] 59: if cmd == 'I': 60: weaver.begin_italic() 61: tail = self.flow_parse(tail) 62: weaver.end_italic() 63: elif cmd == 'B': 64: weaver.begin_bold() 65: tail = self.flow_parse(tail) 66: weaver.end_bold() 67: elif cmd == 'S': 68: # should be non-breaking spaces, but interscript 69: # doesn't implement that 70: tail = self.flow_parse(tail) 71: elif cmd == 'C': 72: weaver.begin_code() 73: tail = self.flow_parse(tail) 74: weaver.end_code() 75: elif cmd == 'L': 76: # a link: we just hack it for now 77: weaver.write('[') 78: tail = self.flow_parse(tail) 79: weaver.write(']') 80: elif cmd == 'F': 81: # filename 82: weaver.begin_code() 83: tail = self.flow_parse(tail) 84: weaver.end_code() 85: elif cmd == 'X': 86: # index entry?? (Does this mean print it, or index it?) 87: # I'll just print it as code :-) 88: weaver.begin_code() 89: tail = self.flow_parse(tail) 90: weaver.end_code() 91: elif cmd == 'Z': 92: # zero width character? What's that mean? 93: tail = self.flow_parse(tail) 94: elif cmd == 'E': 95: match = self.digits_re.match(tail) 96: if match: 97: digits, tail = match.group(1,2) 98: n = chr(int(digits)) 99: weaver.write(n) 100: else: 101: match = self.entity_re.match(tail) 102: if match: 103: entity, tail = match.group(1,2) 104: data = self.html_entity.get(entity,'E<'+entity+'>') 105: weaver.write(data) 106: else: 107: # nothing we recognize, print literally 108: weaver.write('E<') 109: tail = self.flow_parse(tail) 110: weaver.write('>') 111: 112: match = self.esc_re.match(tail) 113: 114: # no (more) matches, so just weave the tail 115: self.weaver.writeline(tail) 116: return '' 117: 118: 119: def end_list_item(self): 120: kind = self.list_type[-1] 121: weaver = self.weaver 122: if kind == 'keyed': weaver.end_keyed_list_item() 123: elif kind == 'bullet': weaver.end_bullet_list_item() 124: elif kind == 'numbered': weaver.end_numbered_list_item() 125: 126: def end_list(self): 127: kind = self.list_type[-1] 128: weaver = self.weaver 129: if kind == 'keyed': weaver.end_keyed_list() 130: elif kind == 'bullet': weaver.end_bullet_list() 131: elif kind == 'numbered': weaver.end_numbered_list() 132: del self.list_type[-1] 133: 134: def end_lists(self): 135: while self.list_type: self.end_list() 136: 137: def begin_list(self,kind): 138: # print '** list type:',kind 139: self.list_type.append(kind) 140: weaver = self.weaver 141: if kind == 'keyed': weaver.begin_keyed_list() 142: elif kind == 'bullet': weaver.begin_bullet_list() 143: elif kind == 'numbered': weaver.begin_numbered_list() 144: 145: def begin_list_item(self,key=None): 146: kind = self.list_type[-1] 147: weaver = self.weaver 148: if kind == 'keyed': weaver.begin_keyed_list_item(key) 149: elif kind == 'bullet': weaver.begin_bullet_list_item() 150: elif kind == 'numbered': weaver.begin_numbered_list_item() 151: 152: def writeline(self,data,file,count,inhibit_sref=0): 153: if not inhibit_sref and not self.inhibit_sref: 154: if (file != self.sink.last_source_file or 155: count != self.sink.last_source_count+1): 156: self.start_section(file,count) 157: self.sink.last_source_file = file 158: self.sink.last_source_count = count 159: tangler_base._writeline(self,data) 160: 161: # try to find a pod command 162: pod = self.pod_re.match(data) 163: 164: # if we're in code mode, and we didn't 165: # get a pod command, just echotangle as code 166: # otherwise, switch to pod mode 167: 168: if self.mode == 'code': 169: if pod: self.mode = 'pod' 170: else: 171: self.weaver.echotangle(self.sink.lines_written,data) 172: return 173: 174: # now we're in pod mode, if we didn't get a pod command, 175: # strip the line to see if it's blank. 176: # if not, weave it and switching pod end of para detection on 177: # otherwise, emit an end of paragraph if detection is on 178: # unless we're in litpar mode, in which case we have to 179: # emulate an 'end' cmd 180: # pod_par means: 0 - begin of para, 1 - flowing text, 2 - literal text 181: assert self.mode == 'pod' 182: if not pod: 183: line = string.rstrip(data) 184: if line: 185: if not self.pod_par: 186: self.pod_par = (line[0] in ' \t')+1 187: if self.pod_par == 1: self.flow_text = '' 188: if self.pod_par-1: 189: self.weaver.writecode(line) 190: else: 191: # we have to search for escapes here! 192: self.flow_text = self.flow_text + line + ' ' 193: elif self.pod_par: 194: self.flow_escape() 195: self.weaver.par() 196: self.pod_par = 0 # beginning of paragraph 197: return 198: 199: # we've got a pod command, so turn para detection off 200: assert pod 201: self.pod_par = 0 202: cmd = pod.group(1) 203: 204: # if we're cuttiung back to code, terminate lists and list 205: # items correctly if nececcary and switch back to code mode 206: 207: if cmd == 'cut': 208: self.end_lists() 209: if hasattr(self,'pod_mode'): 210: if self.pod_mode in ['lit','litpar']: 211: self.weaver.enable() # disable rawmode 212: self.weaver.translate() # disable rawmode 213: del self.pod_mode 214: self.mode = 'code' 215: return 216: 217: # Otherwise, just process the command 218: 219: if cmd == 'head1': 220: self.end_lists() 221: self.weaver.head(1+self.heading_level_offset, pod.group(2)) 222: 223: elif cmd == 'head2': 224: self.end_lists() 225: self.weaver.head(2+self.heading_level_offset, pod.group(2)) 226: 227: elif cmd == 'over': 228: # list of unknown type pending, wait for =item 229: self.pod_mode = 'list' 230: 231: elif cmd == 'back': 232: self.end_list_item() 233: self.end_list() 234: 235: elif cmd == 'item': 236: if not hasattr(self,'pod_mode'): 237: if verbosity >=2: print 'POD: item before over' 238: self.pod_mode = 'list' 239: key = pod.group(2) 240: key = string.strip(key) 241: if self.pod_mode == 'item': 242: self.end_list_item() 243: else: 244: self.pod_mode = 'item' 245: list_type = 'keyed' 246: if len(key)==1: 247: if key in '*+.-': 248: list_type = 'bullet' 249: self.begin_list(list_type) 250: if self.list_type[-1] == 'keyed': 251: # interscript doesn't support formatting of any kind 252: # in keyed list keys (because LaTeX doesn't) 253: # we need another kind of list (LaTeX can be given one) 254: # For now, we remove any X<...> stuff 255: stripkey = '' 256: tail = key 257: match = self.esc_re.match(tail) 258: while match: 259: pre, cmd, tail = match.group(1,2,3) 260: stripkey = stripkey + pre 261: match = self.esc_re.match(tail) 262: if tail: stripkey = stripkey + tail 263: key = stripkey 264: 265: self.begin_list_item(key) 266: 267: elif cmd == 'for': 268: self.weaver.rawif(pod.group(2)) 269: self.pod_mode = 'litpar' 270: elif cmd == 'begin': 271: self.weaver.rawif(pod.group(2)) 272: self.pod_mode = 'lit' 273: elif cmd == 'end': 274: self.weaver.enable() 275: self.weaver.translate() 276: self.weaver.pod_mode = '' 277: 278: def write_comment(self,line): 279: self._writeline('# '+line) 280: 281: def start_section(self, file, count): 282: data = '#line '+str(count)+' '+'"'+file+'"' 283: self._writeline(data) 284: self.weaver.echotangle(self.sink.lines_written,data) 285: 286: def get_comment_tangler(self): 287: return hash_comment_tangler(self.sink,weaver, '# ') 288: 289: def get_string_tangler(self,eol,width): 290: # This is _wrong_ and needs to be fixed! 291: return c_string_tangler(self.sink,self.get_weaver(),eol,width) 292: 293: