COBOL Copybook PIC parser update

This is a replacement for PIC parser class used in a previous post. It can also be run from the command-line, for example:

$ ./pic.py
Usage: pic.py [OPTIONS] PIC-string

Options:
  --version             show program's version number and exit
  -h, --help            show this help message and exit
  -c COMP, --comp=COMP  specify computational (COMP) value
  --test                evaluate and display sample PIC strings

$ ./pic.py --c1 '99'
data-type: 'Float', data-length: 2, implied-dec-pos: 0

$ ./pic.py '9(3)V99'
data-type: 'Float', data-length: 5, implied-dec-pos: 4

$ ./pic.py 'X(13)'
data-type: 'Char', data-length: 13, implied-dec-pos: 0

pic.py

Note: This code has been superceded by the pyCOBOL package, updated code is available at Sourceforge.

import re
import sys
from optparse import *
"""
COBOL Copybook PIC parser
=========================
Parses COBOL Copybook Picture strings
used by copybook_parser.py

:date: 2010-05-21
:version: 0.8
"""

class PIC:

    def __init__(self):
        """
        :Supports:

        - **X** : Literal
        - **S** : Signed (+/-)
        - **9** : Digit
        - **V** : Implied Decimal
        - **Z** : Zero-Padded

        :Ignores:

        - **VALUES**
        - **EE** (scientific notation)
        """
        PIC_RE = {
            'repeats': r'(?P<char>[9ZX])\((?P<num>\d+)\)',
            'float': r'S?[9Z]+[.V][9Z]*',
            'int': r'S?[9Z]+'
        }
        self.PIC_RE = {}
        for key, pattern in PIC_RE.items():
            self.PIC_RE[key] = re.compile(pattern)

    def expand_repeat_chars(self, PIC, REPEAT_RE):
        """
        Expands all characters in picture string followed by '(i)' where
        'i' is an integer. Example: '9(6)' will be expanded to '999999'.

        :type PIC: string
        :param PIC: COBOL Copybook Picture string

        :type REPEAT_RE:  re object
        :param REPEAT_RE: RegEx that returns two (2) matching groups...

          1. `char` character to repeat
          2. `num`  number of times to repeat character

        :rtype: string
        :returns: PIC string after expansion
        """
        match = True
        while match:
            match = REPEAT_RE.search(PIC)
            if match:
                new_string = match.group('char') * int(match.group('num'))
                PIC = REPEAT_RE.sub(new_string, PIC, 1)
        return PIC

    def parse(self, PIC, COMP=0):
        """
        Parses COBOL Copybook PIC (picture) string

        :type PIC: string
        :param PIC: COBOL Copybook Picture Picture string

        :type COMP: integer
        :param COMP: Computational value (COMP-#)

          0. Auto
          1. Float
          2. Double
          3. BCD

        :rtype: tuple
        :returns:

          0. data-type
          1. data-length
          2. implied-decimal-position

        :data-types:
        'Char', 'Integer', 'Float', 'Double' or 'BCD'

        :implied-decimal-position:
        Unit-based (1st char = position #1). This is the position where
        a decimal will be inserted after the data string is retrieved.
        """
        TYPES = ['Integer', 'Float', 'Double', 'BCD']
        type_, decimal_pos = 'Char', 0
        PIC = self.expand_repeat_chars(PIC, self.PIC_RE['repeats'])
        if self.PIC_RE['float'].match(PIC):
            # Force to Float or Double if PIC contains '.' or 'V'
            type_ = ('Float', 'Double')[COMP == 2]
            if 'V' in PIC:
                decimal_pos = PIC.index('V') + 1
                PIC = PIC.replace('V', '')
        elif COMP:
            type_ = TYPES[int(COMP)]
        elif self.PIC_RE['int'].match(PIC):
            type_ = 'Integer'
        return  (type_, len(PIC), decimal_pos)

if __name__ == '__main__':
    ver = 'Copybook PIC Parser ver 0.8'
    usage = 'usage: copybook.py [OPTIONS] PIC-string'
    parser = OptionParser(usage, version=ver)
    parser.add_option('-c', '--comp', dest='comp', default='0',
        help='specify computational (COMP) value')
    parser.add_option('--test', action='store_true',
        help="module unit tests")
    (options, args) = parser.parse_args()
    if args:
        sys.stdout.write(
            'data-type: %r, data-length: %r, implied-dec-pos: %r\n' %
            PIC().parse(args[0].strip(' \'"'), int(options.comp)))
    if options.test:
        import doctest
        print doctest.testfile('pic_tests.py')
    if not args and not options.test:
        parser.print_help()

pic_tests.py

"""
>>> import sys
>>> from pic import PIC
>>> TESTS = [
...     ('S9(6)V99', 0), ('S9(6).99', 0), ('999', 0),
...     ('999', 1), ('999', 2), ('999', 3),
...     ('Z(2)9(6)V99', 2), ('9(6)V99', 3),
...     ('AX(5)-XXX', 0), ('9(5).99', 0)
... ]
>>> for test in TESTS:
...     if test[1]:
...         s = '%s COMP-%d\n' % test
...     else:
...         s = '%s\n' % test[0]
...     sys.stdout.write(
...         s + 'data-type: %r, data-length: %r, implied-dec-pos: %r\n' %
...         PIC().parse(test[0], test[1]))
...
S9(6)V99
data-type: 'Float', data-length: 9, implied-dec-pos: 8
S9(6).99
data-type: 'Float', data-length: 10, implied-dec-pos: 0
999
data-type: 'Integer', data-length: 3, implied-dec-pos: 0
999 COMP-1
data-type: 'Float', data-length: 3, implied-dec-pos: 0
999 COMP-2
data-type: 'Double', data-length: 3, implied-dec-pos: 0
999 COMP-3
data-type: 'BCD', data-length: 3, implied-dec-pos: 0
Z(2)9(6)V99 COMP-2
data-type: 'Double', data-length: 10, implied-dec-pos: 9
9(6)V99 COMP-3
data-type: 'Float', data-length: 8, implied-dec-pos: 7
AX(5)-XXX
data-type: 'Char', data-length: 10, implied-dec-pos: 0
9(5).99
data-type: 'Float', data-length: 8, implied-dec-pos: 0
>>>
"""

Related Posts

COBOL Copybook parser

Additional Resources

Reading COBOL layouts

Tags: , , ,

Leave a comment