Production Ready Macros for SAS Application Developers
https://github.com/sasjs/core
mp_cleancsv.sas
Go to the documentation of this file.
1 /**
2  @file mp_cleancsv.sas
3  @brief Fixes embedded cr / lf / crlf in CSV
4  @details CSVs will sometimes contain lf or crlf within quotes (eg when
5  saved by excel). When the termstr is ALSO lf or crlf that can be tricky
6  to process using SAS defaults.
7  This macro converts any csv to follow the convention of a windows excel file,
8  applying CRLF line endings and converting embedded cr and crlf to lf.
9 
10  usage:
11 
12  fileref mycsv "/path/your/csv";
13  %mp_cleancsv(in=mycsv,out=/path/new.csv)
14 
15  @param in= provide path or fileref to input csv
16  @param out= output path or fileref to output csv
17  @param qchar= quote char - hex code 22 is the double quote.
18 
19  @version 9.2
20  @author Allan Bowe
21  @cond
22 **/
23 
24 %macro mp_cleancsv(in=NOTPROVIDED,out=NOTPROVIDED,qchar='22'x);
25 %if "&in"="NOTPROVIDED" or "&out"="NOTPROVIDED" %then %do;
26  %put %str(ERR)OR: Please provide valid input (&in) and output (&out) locations;
27  %return;
28 %end;
29 
30 /* presence of a period(.) indicates a physical location */
31 %if %index(&in,.) %then %let in="&in";
32 %if %index(&out,.) %then %let out="&out";
33 
34 /**
35  * convert all cr and crlf within quotes to lf
36  * convert all other cr or lf to crlf
37  */
38  data _null_;
39  infile &in recfm=n ;
40  file &out recfm=n;
41  retain isq iscrlf 0 qchar &qchar;
42  input inchar $char1. ;
43  if inchar=qchar then isq = mod(isq+1,2);
44  if isq then do;
45  /* inside a quote change cr and crlf to lf */
46  if inchar='0D'x then do;
47  put '0A'x;
48  input inchar $char1.;
49  if inchar ne '0A'x then do;
50  put inchar $char1.;
51  if inchar=qchar then isq = mod(isq+1,2);
52  end;
53  end;
54  else put inchar $char1.;
55  end;
56  else do;
57  /* outside a quote, change cr and lf to crlf */
58  if inchar='0D'x then do;
59  put '0D0A'x;
60  input inchar $char1.;
61  if inchar ne '0A'x then do;
62  put inchar $char1.;
63  if inchar=qchar then isq = mod(isq+1,2);
64  end;
65  end;
66  else if inchar='0A'x then put '0D0A'x;
67  else put inchar $char1.;
68  end;
69  run;
70 %mend;
71 /** @endcond */