Production Ready Macros for SAS Application Developers
https://github.com/sasjs/core
mp_csv2ds.sas
Go to the documentation of this file.
1 /**
2  @file mp_csv2ds.sas
3  @brief Efficient import of arbitrary CSV using a dataset as template
4  @details Used to import relevant columns from a large CSV using
5  a dataset to provide the types and lengths. Assumes that a header
6  row is provided, and datarows start on line 2. Extra columns in
7  both the CSV and base dataset are ignored.
8 
9  Usage:
10 
11  filename mycsv temp;
12  data _null_;
13  file mycsv;
14  put 'name,age,nickname';
15  put 'John,48,Jonny';
16  put 'Jennifer,23,Jen';
17  run;
18 
19  %mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
20 
21 
22  @param inref= fileref to the CSV
23  @param outds= output ds (lib.ds format)
24  @param view= Set to YES or NO to determine whether the output should be
25  a view or not. Default is NO (not a view).
26  @param baseds= Template dataset on which to create the input statement.
27  Is used to determine types, lengths, and any informats.
28 
29  @version 9.2
30  @author Allan Bowe
31 
32  <h4> Dependencies </h4>
33  @li mp_abort.sas
34  @li mf_existds.sas
35 
36 **/
37 
38 %macro mp_csv2ds(inref=0,outds=0,baseds=0,view=NO);
39 
40 %mp_abort(iftrue=( &inref=0 )
41  ,mac=&sysmacroname
42  ,msg=%str(the INREF variable must be provided)
43 )
44 %mp_abort(iftrue=( %superq(outds)=0 )
45  ,mac=&sysmacroname
46  ,msg=%str(the OUTDS variable must be provided)
47 )
48 %mp_abort(iftrue=( &baseds=0 )
49  ,mac=&sysmacroname
50  ,msg=%str(the BASEDS variable must be provided)
51 )
52 %mp_abort(iftrue=( &baseds=0 )
53  ,mac=&sysmacroname
54  ,msg=%str(the BASEDS variable must be provided)
55 )
56 %mp_abort(iftrue=( %mf_existds(&baseds)=0 )
57  ,mac=&sysmacroname
58  ,msg=%str(the BASEDS dataset (&baseds) needs to be assigned, and to exist)
59 )
60 
61 /* count rows */
62 %local hasheader; %let hasheader=0;
63 data _null_;
64  if _N_ > 1 then do;
65  call symputx('hasheader',1,'l');
66  stop;
67  end;
68  infile &inref;
69  input;
70 run;
71 %mp_abort(iftrue=( &hasheader=0 )
72  ,mac=&sysmacroname
73  ,msg=%str(No header row in &inref)
74 )
75 
76 /* get the variables in the CSV */
77 data _data_;
78  infile &inref;
79  input;
80  length name $32;
81  do i=1 to countc(_infile_,',')+1;
82  name=upcase(scan(_infile_,i,','));
83  output;
84  end;
85  stop;
86 run;
87 %local csv_vars;%let csv_vars=&syslast;
88 
89 /* get the variables in the dataset */
90 proc contents noprint data=&baseds
91  out=_data_ (keep=name type length format: informat);
92 run;
93 %local base_vars; %let base_vars=&syslast;
94 
95 proc sql undo_policy=none;
96 create table &csv_vars as
97  select a.*
98  ,b.type
99  ,b.length
100  ,b.format
101  ,b.formatd
102  ,b.formatl
103  ,b.informat
104  from &csv_vars a
105  left join &base_vars b
106  on a.name=upcase(b.name)
107  order by i;
108 
109 /* prepare the input statement */
110 %local instat dropvars;
111 data _null_;
112  set &syslast end=last;
113  length in dropvars $32767;
114  retain in dropvars;
115  if missing(type) then do;
116  informat='$1.';
117  dropvars=catx(' ',dropvars,name);
118  end;
119  else if missing(informat) then do;
120  if type=1 then informat='best.';
121  else informat=cats('$',length,'.');
122  end;
123  else informat=cats(informat,'.');
124  in=catx(' ',in,name,':',informat);
125  if last then do;
126  call symputx('instat',in,'l');
127  call symputx('dropvars',dropvars,'l');
128  end;
129 run;
130 
131 /* import the CSV */
132 data &outds
133  %if %upcase(&view)=YES %then %do;
134  /view=&outds
135  %end;
136  ;
137  infile &inref dsd firstobs=2;
138  input &instat;
139  %if %length(&dropvars)>0 %then %do;
140  drop &dropvars;
141  %end;
142 run;
143 
144 %mend;