6.6.8.3. Perl Tangler

6.6.8.3. Perl Tangler

This tangler attempts to support Perl's POD constructions. It implements the commands pod, cut, head1, head2, over, back, item, for, begin, and end.

Contrary to the perlpod manpage, Interscript pod commands are terminated at the end of a line, not the end of a 'paragraph'. It shouldn't make the slightest bit of difference, however, since weavers ignore blank lines anyhow.

When the tangler is in paragraph mode, blank lines are translated to end of paragraph commands. Paragraph mode is triggered by any non-command non-blank data lines, so you won't get an end of paragraph after a command before the first text.

Currently, support for the for/begin/end pod constructions is a hack. Interscript uses a different (better!) mechanism.

Start python section to interscript/tanglers/perl.py[1]
     1: #line 22 "perl_tangler.ipk"
     2: from interscript.tanglers.base import tangler_base
     3: from interscript.tanglers.c import c_string_tangler
     4: import re
     5: import string
     6: 
     7: class perl_tangler(tangler_base):
     8:   def __init__(self,sink,weaver, heading_level_offset = 2):
     9:     tangler_base.__init__(self,sink,weaver)
    10:     self.language = 'perl'
    11: 
    12:     self.mode = 'code'
    13:     self.list_type = []
    14:     self.pod_re = re.compile('^=([A-Za-z][A-Za-z0-9_]*) *(.*)$')
    15:     self.heading_level_offset = heading_level_offset
    16:     self.esc_re = re.compile('^(.*?)(>|[IBSCLFXZE]<)(.*)$')
    17:     self.digits_re = re.compile('^([0-9]+)>(.*)$')
    18:     self.entity_re = re.compile('^([A-Za-z]+)>(.*)$')
    19:     # this is not a full list, we should in fact call a weaver routine.
    20:     self.html_entity = {
    21:       'amp':'&',
    22:       'lt':'<',
    23:       'gt':'>',
    24:       'quot':'"',
    25:       'copy':'C',
    26:       'trade':'T',
    27:       'reg':'R'}
    28: 
    29:   def __del__(self):
    30:     self.flow_escape()
    31:     self.end_lists()
    32: 
    33:   def flow_escape(self):
    34:     line = self.flow_text
    35:     if not line: return
    36:     self.flow_text = ''
    37:     # process balanced text,
    38:     # if there is an unbalanced >, the text after it is returned
    39:     # write a >, and then try again.
    40:     tail = self.flow_parse(line)
    41:     while tail:
    42:       if verbosity >=4: print 'Unbalanced > in perl POD text'
    43:       self.weaver.write('>')
    44:       tail = self.flow_parse(tail)
    45: 
    46:   # recursive descent parser
    47:   def flow_parse(self,tail):
    48:     if not tail: return ''
    49:     weaver = self.weaver
    50: 
    51:     match = self.esc_re.match(tail)
    52:     while match:
    53:       pre, cmd, tail = match.group(1,2,3)
    54:       if pre: weaver.write(pre)
    55:       if cmd=='>': return tail
    56: 
    57:       assert len(cmd)==2 and cmd[1]=='<'
    58:       cmd = cmd[0]
    59:       if cmd == 'I':
    60:         weaver.begin_italic()
    61:         tail = self.flow_parse(tail)
    62:         weaver.end_italic()
    63:       elif cmd == 'B':
    64:         weaver.begin_bold()
    65:         tail = self.flow_parse(tail)
    66:         weaver.end_bold()
    67:       elif cmd == 'S':
    68:         # should be non-breaking spaces, but interscript
    69:         # doesn't implement that
    70:         tail = self.flow_parse(tail)
    71:       elif cmd == 'C':
    72:         weaver.begin_code()
    73:         tail = self.flow_parse(tail)
    74:         weaver.end_code()
    75:       elif cmd == 'L':
    76:         # a link: we just hack it for now
    77:         weaver.write('[')
    78:         tail = self.flow_parse(tail)
    79:         weaver.write(']')
    80:       elif cmd == 'F':
    81:         # filename
    82:         weaver.begin_code()
    83:         tail = self.flow_parse(tail)
    84:         weaver.end_code()
    85:       elif cmd == 'X':
    86:         # index entry??  (Does this mean print it, or index it?)
    87:         # I'll just print it as code :-)
    88:         weaver.begin_code()
    89:         tail = self.flow_parse(tail)
    90:         weaver.end_code()
    91:       elif cmd == 'Z':
    92:         # zero width character? What's that mean?
    93:         tail = self.flow_parse(tail)
    94:       elif cmd == 'E':
    95:         match = self.digits_re.match(tail)
    96:         if match:
    97:           digits, tail = match.group(1,2)
    98:           n = chr(int(digits))
    99:           weaver.write(n)
   100:         else:
   101:           match = self.entity_re.match(tail)
   102:           if match:
   103:             entity, tail = match.group(1,2)
   104:             data = self.html_entity.get(entity,'E<'+entity+'>')
   105:             weaver.write(data)
   106:           else:
   107:             # nothing we recognize, print literally
   108:             weaver.write('E<')
   109:             tail = self.flow_parse(tail)
   110:             weaver.write('>')
   111: 
   112:       match = self.esc_re.match(tail)
   113: 
   114:     # no (more) matches, so just weave the tail
   115:     self.weaver.writeline(tail)
   116:     return ''
   117: 
   118: 
   119:   def end_list_item(self):
   120:     kind = self.list_type[-1]
   121:     weaver = self.weaver
   122:     if kind == 'keyed': weaver.end_keyed_list_item()
   123:     elif kind == 'bullet': weaver.end_bullet_list_item()
   124:     elif kind == 'numbered': weaver.end_numbered_list_item()
   125: 
   126:   def end_list(self):
   127:     kind = self.list_type[-1]
   128:     weaver = self.weaver
   129:     if kind == 'keyed': weaver.end_keyed_list()
   130:     elif kind == 'bullet': weaver.end_bullet_list()
   131:     elif kind == 'numbered': weaver.end_numbered_list()
   132:     del self.list_type[-1]
   133: 
   134:   def end_lists(self):
   135:     while self.list_type: self.end_list()
   136: 
   137:   def begin_list(self,kind):
   138:     # print '** list type:',kind
   139:     self.list_type.append(kind)
   140:     weaver = self.weaver
   141:     if kind == 'keyed': weaver.begin_keyed_list()
   142:     elif kind == 'bullet': weaver.begin_bullet_list()
   143:     elif kind == 'numbered': weaver.begin_numbered_list()
   144: 
   145:   def begin_list_item(self,key=None):
   146:     kind = self.list_type[-1]
   147:     weaver = self.weaver
   148:     if kind == 'keyed': weaver.begin_keyed_list_item(key)
   149:     elif kind == 'bullet': weaver.begin_bullet_list_item()
   150:     elif kind == 'numbered': weaver.begin_numbered_list_item()
   151: 
   152:   def writeline(self,data,file,count,inhibit_sref=0):
   153:     if not inhibit_sref and not self.inhibit_sref:
   154:       if (file != self.sink.last_source_file or
   155:         count != self.sink.last_source_count+1):
   156:         self.start_section(file,count)
   157:     self.sink.last_source_file = file
   158:     self.sink.last_source_count = count
   159:     tangler_base._writeline(self,data)
   160: 
   161:     # try to find a pod command
   162:     pod = self.pod_re.match(data)
   163: 
   164:     # if we're in code mode, and we didn't
   165:     # get a pod command, just echotangle as code
   166:     # otherwise, switch to pod mode
   167: 
   168:     if self.mode == 'code':
   169:       if pod: self.mode = 'pod'
   170:       else:
   171:         self.weaver.echotangle(self.sink.lines_written,data)
   172:         return
   173: 
   174:     # now we're in pod mode, if we didn't get a pod command,
   175:     # strip the line to see if it's blank.
   176:     # if not, weave it and switching pod end of para detection on
   177:     # otherwise, emit an end of paragraph if detection is on
   178:     # unless we're in litpar mode, in which case we have to
   179:     # emulate an 'end' cmd
   180:     # pod_par means: 0 - begin of para, 1 - flowing text, 2 - literal text
   181:     assert self.mode == 'pod'
   182:     if not pod:
   183:       line = string.rstrip(data)
   184:       if line:
   185:         if not self.pod_par:
   186:           self.pod_par = (line[0] in ' \t')+1
   187:           if self.pod_par == 1: self.flow_text = ''
   188:         if self.pod_par-1:
   189:           self.weaver.writecode(line)
   190:         else:
   191:           # we have to search for escapes here!
   192:           self.flow_text = self.flow_text + line + ' '
   193:       elif self.pod_par:
   194:         self.flow_escape()
   195:         self.weaver.par()
   196:         self.pod_par = 0 # beginning of paragraph
   197:       return
   198: 
   199:     # we've got a pod command, so turn para detection off
   200:     assert pod
   201:     self.pod_par = 0
   202:     cmd = pod.group(1)
   203: 
   204:     # if we're cuttiung back to code, terminate lists and list
   205:     # items correctly if nececcary and switch back to code mode
   206: 
   207:     if cmd == 'cut':
   208:       self.end_lists()
   209:       if hasattr(self,'pod_mode'):
   210:         if self.pod_mode in ['lit','litpar']:
   211:           self.weaver.enable() # disable rawmode
   212:           self.weaver.translate() # disable rawmode
   213:         del self.pod_mode
   214:       self.mode = 'code'
   215:       return
   216: 
   217:     # Otherwise, just process the command
   218: 
   219:     if cmd == 'head1':
   220:       self.end_lists()
   221:       self.weaver.head(1+self.heading_level_offset, pod.group(2))
   222: 
   223:     elif cmd == 'head2':
   224:       self.end_lists()
   225:       self.weaver.head(2+self.heading_level_offset, pod.group(2))
   226: 
   227:     elif cmd == 'over':
   228:       # list of unknown type pending, wait for =item
   229:       self.pod_mode = 'list'
   230: 
   231:     elif cmd == 'back':
   232:       self.end_list_item()
   233:       self.end_list()
   234: 
   235:     elif cmd == 'item':
   236:       if not hasattr(self,'pod_mode'):
   237:         if verbosity >=2: print 'POD: item before over'
   238:         self.pod_mode = 'list'
   239:       key = pod.group(2)
   240:       key = string.strip(key)
   241:       if self.pod_mode == 'item':
   242:         self.end_list_item()
   243:       else:
   244:         self.pod_mode = 'item'
   245:         list_type = 'keyed'
   246:         if len(key)==1:
   247:           if key in '*+.-':
   248:             list_type = 'bullet'
   249:         self.begin_list(list_type)
   250:       if self.list_type[-1] == 'keyed':
   251:         # interscript doesn't support formatting of any kind
   252:         # in keyed list keys (because LaTeX doesn't)
   253:         # we need another kind of list (LaTeX can be given one)
   254:         # For now, we remove any X<...> stuff
   255:         stripkey = ''
   256:         tail = key
   257:         match = self.esc_re.match(tail)
   258:         while match:
   259:           pre, cmd, tail = match.group(1,2,3)
   260:           stripkey = stripkey + pre
   261:           match = self.esc_re.match(tail)
   262:         if tail: stripkey = stripkey + tail
   263:         key = stripkey
   264: 
   265:       self.begin_list_item(key)
   266: 
   267:     elif cmd == 'for':
   268:       self.weaver.rawif(pod.group(2))
   269:       self.pod_mode = 'litpar'
   270:     elif cmd == 'begin':
   271:       self.weaver.rawif(pod.group(2))
   272:       self.pod_mode = 'lit'
   273:     elif cmd == 'end':
   274:       self.weaver.enable()
   275:       self.weaver.translate()
   276:       self.weaver.pod_mode = ''
   277: 
   278:   def write_comment(self,line):
   279:     self._writeline('# '+line)
   280: 
   281:   def start_section(self, file, count):
   282:     data = '#line '+str(count)+' '+'"'+file+'"'
   283:     self._writeline(data)
   284:     self.weaver.echotangle(self.sink.lines_written,data)
   285: 
   286:   def get_comment_tangler(self):
   287:     return hash_comment_tangler(self.sink,weaver, '# ')
   288: 
   289:   def get_string_tangler(self,eol,width):
   290:     # This is _wrong_ and needs to be fixed!
   291:     return c_string_tangler(self.sink,self.get_weaver(),eol,width)
   292: 
   293: 
End python section to interscript/tanglers/perl.py[1]