#!/usr/bin/perl # -*- mode: cperl; coding: utf-8; -*- use strict; use warnings; use utf8; use lib "/h/hamren/src/post/lib", "."; my $rval = do "common.pm" || die "$0: common.pm failed ($!) [$@]"; #--- Single-line common initializer #--- End of header #https://en.wikipedia.org/wiki/EBCDIC #https://en.wikipedia.org/wiki/EBCDIC_1047 my $jcl = "«JCL»"; post( header(), p("This post is an extremely long answer to the question \"Why do I have to say LINE SEQUENTIAL in my Cobol SELECT statements?\"."), p("The target audience are programmers who are used to the Windows and UNIX file systems, ", " but does not know anything about the filesystems on IBM's mainframe operating system z/OS."), h3("The UNIX and Windows file systems"), p("All files are byte-oriented. ", " The smallest unit that can be read or written is one byte.", " There is no additional structure to files;", " a program can read or write any number of bytes at any offset in the file."), p(" The OS or an application may impose a structure on a file, but the file system does not know about it. ", " And what it does not know, it can not enforce. "), h3("The native z/OS file systems"), p("The OS that runs on IBM mainframes is called z/OS, a rebranding of earlier OS/390.", " It has its own file system that is very different from what UNIX and Windows have.", " It does not seem to have a name, but I will call it z/FS."), p(" Do not confuse z/FS with ZFS.", " ZFS is an IBM implementation of the UNIX file system. ", " It is available on several platforms, including Linux. "), p("On z/FS (N.B. not z/OS) almost all files, including text files, are record-oriented. ", " Files either has a fixed record size, or a variable record size. ", " The smallest unit that can be read or written is one record. ", " It is not possible to read or write a part of a record. "), p("In this text, all z/FS files are assumed to be record-structured."), p("z/OS also provides a UNIX file system, called HFS (Hierarchical File System).", " HFS behaves just as any other UNIX file system."), h3("What is a text file?"), p("The phrase ⊂text file⊃ is not a well defined technical term; it is more of a convention.", " By ⊂text file⊃ we usually mean: ", ul(" A file that contains mainly printable characters.", " A file that is divided into lines.", " A file that can be edited in a text editor.")), p("If a character is printable or not depends on the character set used.", " On UNIX and Windows the character set is either the ASCII character set,", " or some character set derived from ASCII.", " On z/OS it is some variant of the EBCDIC character set."), p("On UNIX and Windows file systems, text files are stored as sequences of bytes.". " There is a ⊂line terminator⊃ after each line. ", " The ⊂line terminator⊃ is one or two bytes. ", " The byte value of the line terminator depends on both the file system type and the host character set."), p("On z/FS, the native z/OS file system, text files are stored as records. ". " Each records is one line of text.". " There are no line terminators."), css(<<'EOF'), table.a, table.a td { border-width: 0px; padding: 0px; } table.a { border: solid black; border-width: 2px 0px 2px 0px; width: auto; } table.a td:first-child { padding-left: 0pt; } table.a td { white-space: nowrap; padding-left: 2em; width: 3.5em; } table.a td.l { width: auto; white-space: normal;} table.a tr:first-child td { font-weight: bold; } EOF table({class=>'a'}, trow(td("OS"), td("File system"), td("Char. set"), td("Line term."), td({class=>'l'}, "Comment")), trow( td("Windows"), td("FAT, NTFS"), td("ASCII"), td("0x0D 0x0A"), td({class=>'l'}, "Carriage return fllowed by line feed.")), trow( td("UNIX"), td("UNIX"), td("ASCII"), td("0x0A"), td({class=>'l'}, "Line feed, ASCII encoding.")), trow( td("z/OS"), td("z/FS"), td("EBCDIC"), td("none"), td({class=>'l'}, "Assuming record-oriented files.")), trow( td("z/OS"), td("UNIX"), td("EBCDIC"), td("0x15"), td({class=>'l'}, "Line feed, EBCDIC encoding.")) ), p("As programmers we must be aware of the target file system.", " On UNIX file systems we should provide and expect LF at the end of lines.", " On Windows file systems we should provide and expect CR/LF at the end of lines.", " And on z/FS we should neither provide nor expect a line terminator."), h3("How to do it in Cobol"), p("This is where we use «SELECT ... LINE SEQUENTIAL». ", " It tells Cobol that the file is on a byte-oriented file system with line terminators.", " The Cobol runtime will then do the right thing. "), p("Without «LINE SEQUENTIAL» line terminators are neither expected nor provided. ", " This works well with record-oriented files on z/FS.", " But on byte-oriented files output will be one long line.", " If the Cobol record is 77 bytes, then exactly 77 bytes will be written, and exactly 77 bytes will be read."), css(<<'EOF'), table.b { border: solid black; border-width: 2px 0px 2px 0px; } table.b * { border: 0px; } table.b td.n { white-space: nowrap; width: 8em; } td.grey { background-color: #CCC; } EOF p("Here is a summary of writing in different cases. Reading works in a similar way."), table({class=>'b'}, trow(td({class=>'n'}, "Write." ), td("«SELECT ... LINE SEQUENTIAL»"), td("«SELECT ... SEQUENTIAL»" ) ), trow( td({class=>'grey n' }, "UNIX" ), td({class=>'grey', rowspan=>2}, "Text mode write. The whole records is written, including trailing spaces. A line feed character is appended. The byte value of the newline character depends of the characterset, see table above."), td({class=>'grey', rowspan=>2}, "Binary mode write. The whole record is written, but no line feed is appended. ") ), trow( td({class=>'grey n'}, "z/OS + HFS" ) ), trow( td({class=>'n'}, "z/OS + z/FS" ), td("This is not allowed, and will result in a runtime error."), td("Record oriented binary write. One record is written." ) ) ), p("It is unfortunate that z/FS does not simply ignore «LINE SEQUENTIAL»."), h3("Final notes."), p("Saying that a record-oriented z/FS file can only be read and written record-by-record is not strictly true. ", " It is possible to trick z/OS into reading such a file as a byte stream. ", " One way is to write a C program that a) opens the file in binary mode, and b) runs on the UNIX subsystem. "), p(" Yes, there is a complete UNIX subsystem in z/OS that can compile and run many UNIX program.", " It is possible to ssh to the UNIX subsystem for shell access.", " Even Emacs and other open source software is available, but not installed by default."), p(" The UNIX subsystem is a set of z/OS routines that implements UNIX system calls. ", " With the system calls in place, libraries can be ported, as well as programs.", " This is similar to Cygwin, the UNIX emulation layer for Windows, except that here it is part of the OS."), footer() ); __END__ p("In this article, UNIX refers to any UNIX-like OS, including Linux and macOS."), h3("The UNIX and Windows file systems"), p("All files are byte streams. There is no additional structure to the files as the OS provides them. ", " Individual programs can impose a structure on files, but that is not something that the OS knows about.", " A program can read or write any number of bytes at any offset in the file.", " If a file is meant to hold fixed-length records of 100 bytes, it is still possible to read 67 bytes at offset 123.", " It is not that the OS does not care. It really does not know. ", " At the lowest level, there is no information to say that the file contains records,", " and that a program should only read or write whole records."), p("Specifically, unlike z/OS, the UNIX and Windows do not provide a file type to act as a database."), p("Of course, there are many file types that parts of the OS knows about, such as executable files.", " Without that knowledge the OS would work. ", " An executable definitely has a well-defined structure, but it is still possible for a program to read or write any part of the file.", " A program may break an executable file, or any other type of file, by writing to it without respect for the file structure. ", " The file structure is just a convention. "), p("This may all be obvious, but as we shall see, other operating systems have other ideas. "), p("UNIX and Windows are similar also in that they have directories arranged in a hierarchy."), h3("The native z/OS file systems"), p("The OS that runs on IBM mainframes is now called z/OS, a rebranding of earlier OS/390.", "It has its own file system, that I will call z/FS, that is very different from what UNIX and Windows have.", " A few things stand out:" ), ul("There is no directory structure. To the OS user, it looks as if all files reside in a single directory.", "There are many types of files.". " Almost all files are divides into records of either fixed or variable length.", "A file may have one or more keys, maintained by the the OS, mych like an SQL table.". " Records can be found using one or more keys, and read in order by one or more keys.". " These files act as databases.", "The IBM word for a file is a \"data set\".", "A special type of file, called a PDS (Partitioned Data Set), can contains \"element\" that act almost like a non-PDS file. ". " Only records-structured files without keys may be members. In particular, a PDS can not be a member of a PDS. ". " Think of a PDS as a directory with only one level."), p("A file name must be unique within a volume, the z/OS name for a partition. ", "File names can be 48 characters long. ", "They are divides into smaller parts separated by periods.", " These smaller parts may be at most 8 characters long.", " The convention is that users only create files that start with user iser identity, which is eight characters or less."), p("Here are some possible files belonging to programmer JOE."), source_codeq(<<'EOF'), JOE.CL.CLIST JOE.ISPF.ISPPROF JOE.LIB.JCL JOE.SRC.C JOE.SRC.COB JOE.SRC.COB.DATA.FB128 JOE.SRC.COB.DATA.FB80 JOE.SRC.COB.JCL EOF p("All of these are likely to be of type PDS.", " A file like «JOE.SRC.C» can contain a number of file, probably C source files judging from the name."), p("The parameters of a file is determined when it is created.", " Is the file record-oriented or not?", " Is the file a PDS or not?", " Are records of fixed or variable length?"), p("Most parameters, including the ones just mentioned, can not be changes once a file is created.", " To change a parameter, a new file must be created and the contents of the old file copied to the new file."), p("And this is just for starters.", " There are quite a few different types of files, a a large number of parameters.", " Mastering z/FS is no mean feat."), p("z/OS also privides a UNIX file system, called HFS (Hierarchical File System).", " The UNIX file system does of course not provide record-structured files,", " but can be used for most text files and any kind of raw binary file, such as those used by an database manager."), h3("Text files"), p("What is a text file?", " Neither UNIX, Windows or z/OS provides a file type that is a \"text file\".", " Well, a text file is any file that you can edit in a text editor, or print on an old-fashioned printer. ", " On UNIX, by convention there is a Line-Feed character between lines. ", " On Windows the convention is a Carriage-Return and a Line-Feed.", " On z/FS one line is one record, and no separator is necessary."), p("Note that the above line says z/FS, not z/OS. If you have a text file on a UNIX file system on z/OS,", " then the UNIX convention applies.", " How text files are represented depends on the file system, not on the OS."), p("This applies for any case when you use a file system from on OS on another OS. ", " If you mount an NTFS file system on a UNIX machine, then the Windows convention, applies. ", " I say Windows convention simply because this convention applies to all file systems from Microsoft.", " This convention was in place already in MS/DOS. "), p("A program that works with text files must handle these convention somehow.", " Usually it is left to runtime libraries, but they have to be told if an input or output file is a text file or a binary file. ", " In C and many other languages a flags is passed when opening a file. ", " If the flags says \"text file\", then the runtime library will apply the conventions of the file system. "), p("Some language, like Cobol, which comes from the IBM world, expect a file to be binary.", " Writing a text line means writing a record, and no line delimiter is necessary. ", " But what happens when the program is run on UNIX or Windows?", " Cobol will by default not add a line separator.", " Writing ten lines, ten records, will result in a single long line.", " This is the right thing if the data is binary, which Cobol assumes."), p("To tell Cobol the handle line delimiters, we need to add an «LINE SEQUENTIAL» clause to our «SELECT» statement.", " When we do, the Cobol runtime will add a line delimiter when writing a file.", " When reading a file, the runtime will expect a line delimiter; that line delimiter will be stripped away.", " Line delimiters are thus transparent to the program. "), p("The main disadvantage is that we have to modify the source to fit the target file system. ", " z/OS Cobol does not allow us to say «LINE SEQUENTIAL», at least not for a file residing on z/FS."), table({ style => 'width: auto' }, trow( td("4_"), td("SP"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("¢"), td("." ), td("<"), td("("), td("+"), td("|" ) ), trow( td("5_"), td("&" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("!" ), td("\$"), td("*"), td(")"), td(";"), td("¬") ), trow( td("6_"), td("-" ), td("/"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("¦"), td("," ), td("%"), td("_"), td(">"), td("?" ) ), trow( td("7_"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("`"), td(":" ), td("#" ), td("@"), td("'"), td("="), td("\"" ) ), trow( td("8_"), td("" ), td("a"), td("b"), td("c" ), td("d"), td("e"), td("f"), td("g"), td("h"), td("i"), td("" ), td("" ), td("" ), td("" ), td("" ), td("±") ), trow( td("9_"), td("" ), td("j"), td("k"), td("l" ), td("m"), td("n"), td("o"), td("p"), td("q"), td("r"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ) ), trow( td("A_"), td("" ), td("~"), td("s"), td("t" ), td("u"), td("v"), td("w"), td("x"), td("y"), td("z"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ) ), trow( td("B_"), td("^" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ), td("[" ), td("]" ), td("" ), td("" ), td("" ), td("" ) ), trow( td("C_"), td("{" ), td("A"), td("B"), td("C" ), td("D"), td("E"), td("F"), td("G"), td("H"), td("I"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ) ), trow( td("D_"), td("}" ), td("J"), td("K"), td("L" ), td("M"), td("N"), td("O"), td("P"), td("Q"), td("R"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ) ), trow( td("E_"), td("\\"), td("" ), td("S"), td("T" ), td("U"), td("V"), td("W"), td("X"), td("Y"), td("Z"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ) ), trow( td("F_"), td("0" ), td("1"), td("2"), td("3" ), td("4"), td("5"), td("6"), td("7"), td("8"), td("9"), td("" ), td("" ), td("" ), td("" ), td("" ), td("" ) ) ), footer() );