Production Ready Macros for SAS Application Developers
https://github.com/sasjs/core
mp_cleancsv.sas
Go to the documentation of this file.
1 /**
2  @file mp_cleancsv.sas
3  @brief Fixes embedded cr / lf / crlf in CSV
4  @details CSVs will sometimes contain lf or crlf within quotes (eg when
5  saved by excel). When the termstr is ALSO lf or crlf that can be tricky
6  to process using SAS defaults.
7  This macro converts any csv to follow the convention of a windows excel file,
8  applying CRLF line endings and converting embedded cr and crlf to lf.
9 
10  usage:
11  fileref mycsv "/path/your/csv";
12  %mp_cleancsv(in=mycsv,out=/path/new.csv)
13 
14  @param in= provide path or fileref to input csv
15  @param out= output path or fileref to output csv
16  @param qchar= quote char - hex code 22 is the double quote.
17 
18  @version 9.2
19  @author Allan Bowe
20 **/
21 
22 %macro mp_cleancsv(in=NOTPROVIDED,out=NOTPROVIDED,qchar='22'x);
23 %if "&in"="NOTPROVIDED" or "&out"="NOTPROVIDED" %then %do;
24  %put %str(ERR)OR: Please provide valid input (&in) and output (&out) locations;
25  %return;
26 %end;
27 
28 /* presence of a period(.) indicates a physical location */
29 %if %index(&in,.) %then %let in="&in";
30 %if %index(&out,.) %then %let out="&out";
31 
32 /**
33  * convert all cr and crlf within quotes to lf
34  * convert all other cr or lf to crlf
35  */
36  data _null_;
37  infile &in recfm=n ;
38  file &out recfm=n;
39  retain isq iscrlf 0 qchar &qchar;
40  input inchar $char1. ;
41  if inchar=qchar then isq = mod(isq+1,2);
42  if isq then do;
43  /* inside a quote change cr and crlf to lf */
44  if inchar='0D'x then do;
45  put '0A'x;
46  input inchar $char1.;
47  if inchar ne '0A'x then do;
48  put inchar $char1.;
49  if inchar=qchar then isq = mod(isq+1,2);
50  end;
51  end;
52  else put inchar $char1.;
53  end;
54  else do;
55  /* outside a quote, change cr and lf to crlf */
56  if inchar='0D'x then do;
57  put '0D0A'x;
58  input inchar $char1.;
59  if inchar ne '0A'x then do;
60  put inchar $char1.;
61  if inchar=qchar then isq = mod(isq+1,2);
62  end;
63  end;
64  else if inchar='0A'x then put '0D0A'x;
65  else put inchar $char1.;
66  end;
67  run;
68 %mend;