Macros for SAS Application Developers
https://github.com/sasjs/core
mp_cleancsv.sas
Go to the documentation of this file.
1/**
2 @file
3 @brief Fixes embedded cr / lf / crlf in CSV
4 @details CSVs will sometimes contain lf or crlf within quotes (eg when
5 saved by excel). When the termstr is ALSO lf or crlf that can be tricky
6 to process using SAS defaults.
7 This macro converts any csv to follow the convention of a windows excel file,
8 applying CRLF line endings and converting embedded cr and crlf to lf.
9
10 Usage:
11
12 fileref mycsv "/path/your/csv";
13 %mp_cleancsv(in=mycsv,out=/path/new.csv)
14
15 @param in= (NOTPROVIDED) Provide path or fileref to input csv. If a period is
16 found, it is assumed to be a file.
17 @param out= (NOTPROVIDED) Output path or fileref to output csv. If a period
18 is found, it is assumed to be a file.
19 @param qchar= ('22'x) Quote char - hex code 22 is the double quote.
20
21 @version 9.2
22 @author Allan Bowe
23 @cond
24**/
25
26%macro mp_cleancsv(in=NOTPROVIDED,out=NOTPROVIDED,qchar='22'x);
27%if "&in"="NOTPROVIDED" or "&out"="NOTPROVIDED" %then %do;
28 %put %str(ERR)OR: Please provide valid input (&in) & output (&out) locations;
29 %return;
30%end;
31
32/* presence of a period(.) indicates a physical location */
33%if %index(&in,.) %then %let in="&in";
34%if %index(&out,.) %then %let out="&out";
35
36/**
37 * convert all cr and crlf within quotes to lf
38 * convert all other cr or lf to crlf
39 */
40 data _null_;
41 infile &in recfm=n ;
42 file &out recfm=n;
43 retain isq iscrlf 0 qchar &qchar;
44 input inchar $char1. ;
45 if inchar=qchar then isq = mod(isq+1,2);
46 if isq then do;
47 /* inside a quote change cr and crlf to lf */
48 if inchar='0D'x then do;
49 put '0A'x;
50 input inchar $char1.;
51 if inchar ne '0A'x then do;
52 put inchar $char1.;
53 if inchar=qchar then isq = mod(isq+1,2);
54 end;
55 end;
56 else put inchar $char1.;
57 end;
58 else do;
59 /* outside a quote, change cr and lf to crlf */
60 if inchar='0D'x then do;
61 crblank:
62 put '0D0A'x;
63 input inchar $char1.;
64 if inchar='0D'x then do;
65 /* multiple CR indicates CR formatted file with blank lines */
66 goto crblank;
67 end;
68 else if inchar ne '0A'x then do;
69 put inchar $char1.;
70 if inchar=qchar then isq = mod(isq+1,2);
71 end;
72 end;
73 else if inchar='0A'x then put '0D0A'x;
74 else put inchar $char1.;
75 end;
76 run;
77%mend mp_cleancsv;
78/** @endcond */