Macros for SAS Application Developers
https://github.com/sasjs/core
mp_csv2ds.sas
Go to the documentation of this file.
1/**
2 @file mp_csv2ds.sas
3 @brief Efficient import of arbitrary CSV using a dataset as template
4 @details Used to import relevant columns from a large CSV using
5 a dataset to provide the types and lengths. Assumes that a header
6 row is provided, and datarows start on line 2. Extra columns in
7 both the CSV and base dataset are ignored.
8
9 Usage:
10
11 filename mycsv temp;
12 data _null_;
13 file mycsv;
14 put 'name,age,nickname';
15 put 'John,48,Jonny';
16 put 'Jennifer,23,Jen';
17 run;
18
19 %mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
20
21
22 @param inref= fileref to the CSV
23 @param outds= output ds (lib.ds format)
24 @param view= Set to YES or NO to determine whether the output should be
25 a view or not. Default is NO (not a view).
26 @param baseds= Template dataset on which to create the input statement.
27 Is used to determine types, lengths, and any informats.
28
29 @version 9.2
30 @author Allan Bowe
31
32 <h4> SAS Macros </h4>
33 @li mp_abort.sas
34 @li mf_existds.sas
35
36**/
37
38%macro mp_csv2ds(inref=0,outds=0,baseds=0,view=NO);
39
40%mp_abort(iftrue=( &inref=0 )
41 ,mac=&sysmacroname
42 ,msg=%str(the INREF variable must be provided)
43)
44%mp_abort(iftrue=( %superq(outds)=0 )
45 ,mac=&sysmacroname
46 ,msg=%str(the OUTDS variable must be provided)
47)
48%mp_abort(iftrue=( &baseds=0 )
49 ,mac=&sysmacroname
50 ,msg=%str(the BASEDS variable must be provided)
51)
52%mp_abort(iftrue=( %mf_existds(&baseds)=0 )
53 ,mac=&sysmacroname
54 ,msg=%str(the BASEDS dataset (&baseds) needs to be assigned, and to exist)
55)
56
57/* count rows */
58%local hasheader; %let hasheader=0;
59data _null_;
60 if _N_ > 1 then do;
61 call symputx('hasheader',1,'l');
62 stop;
63 end;
64 infile &inref;
65 input;
66run;
67%mp_abort(iftrue=( &hasheader=0 )
68 ,mac=&sysmacroname
69 ,msg=%str(No header row in &inref)
70)
71
72/* get the variables in the CSV */
73data _data_;
74 infile &inref;
75 input;
76 length name $32;
77 do i=1 to countc(_infile_,',')+1;
78 name=upcase(scan(_infile_,i,','));
79 output;
80 end;
81 stop;
82run;
83%local csv_vars;%let csv_vars=&syslast;
84
85/* get the variables in the dataset */
86proc contents noprint data=&baseds
87 out=_data_ (keep=name type length format: informat);
88run;
89%local base_vars; %let base_vars=&syslast;
90
91proc sql undo_policy=none;
92create table &csv_vars as
93 select a.*
94 ,b.type
95 ,b.length
96 ,b.format
97 ,b.formatd
98 ,b.formatl
99 ,b.informat
100 from &csv_vars a
101 left join &base_vars b
102 on a.name=upcase(b.name)
103 order by i;
104
105/* prepare the input statement */
106%local instat dropvars;
107data _null_;
108 set &syslast end=last;
109 length in dropvars $32767;
110 retain in dropvars;
111 if missing(type) then do;
112 informat='$1.';
113 dropvars=catx(' ',dropvars,name);
114 end;
115 else if missing(informat) then do;
116 if type=1 then informat='best.';
117 else informat=cats('$',length,'.');
118 end;
119 else informat=cats(informat,'.');
120 in=catx(' ',in,name,':',informat);
121 if last then do;
122 call symputx('instat',in,'l');
123 call symputx('dropvars',dropvars,'l');
124 end;
125run;
126
127/* import the CSV */
128data &outds
129 %if %upcase(&view)=YES %then %do;
130 /view=&outds
131 %end;
132 ;
133 infile &inref dsd firstobs=2;
134 input &instat;
135 %if %length(&dropvars)>0 %then %do;
136 drop &dropvars;
137 %end;
138run;
139
140%mend mp_csv2ds;