Production Ready Macros for SAS Application Developers
https://github.com/sasjs/core
mp_ds2cards.sas
Go to the documentation of this file.
1/**
2 @file
3 @brief Create a CARDS file from a SAS dataset.
4 @details Uses dataset attributes to convert all data into datalines.
5 Running the generated file will rebuild the original dataset. Includes
6 support for large decimals, binary data, PROCESSED_DTTM columns, and
7 alternative encoding. If the input dataset is empty, the cards file will
8 still be created.
9
10 Additional support to generate a random sample and max rows.
11
12 Usage:
13
14 %mp_ds2cards(base_ds=sashelp.class
15 , tgt_ds=work.class
16 , cards_file= "C:\temp\class.sas"
17 , showlog=NO
18 , maxobs=5
19 )
20
21 TODO:
22 - labelling the dataset
23 - explicity setting a unix LF
24 - constraints / indexes etc
25
26 @param [in] base_ds= Should be two level - eg work.blah. This is the table
27 that is converted to a cards file.
28 @param [in] tgt_ds= Table that the generated cards file would create.
29 Optional - if omitted, will be same as BASE_DS.
30 @param [out] cards_file= ("%sysfunc(pathname(work))/cardgen.sas") Location in
31 which to write the (.sas) cards file
32 @param [in] maxobs= (max) To limit output to the first <code>maxobs</code>
33 observations, enter an integer here.
34 @param [in] random_sample= (NO) Set to YES to generate a random sample of
35 data. Can be quite slow.
36 @param [in] showlog= (YES) Whether to show generated cards file in the SAS
37 log. Valid values:
38 @li YES
39 @li NO
40 @param [in] outencoding= Provide encoding value for file statement (eg utf-8)
41 @param [in] append= (NO) If NO then will rebuild the cards file if it already
42 exists, otherwise will append to it. Used by the mp_lib2cards.sas macro.
43
44 <h4> Related Macros </h4>
45 @li mp_lib2cards.sas
46 @li mp_ds2inserts.sas
47 @li mp_mdtablewrite.sas
48
49 @version 9.2
50 @author Allan Bowe
51**/
52
53%macro mp_ds2cards(base_ds=, tgt_ds=
54 ,cards_file="%sysfunc(pathname(work))/cardgen.sas"
55 ,maxobs=max
56 ,random_sample=NO
57 ,showlog=YES
58 ,outencoding=
59 ,append=NO
60)/*/STORE SOURCE*/;
61%local i setds nvars;
62
63%if not %sysfunc(exist(&base_ds)) %then %do;
64 %put %str(WARN)ING: &base_ds does not exist;
65 %return;
66%end;
67
68%if %index(&base_ds,.)=0 %then %let base_ds=WORK.&base_ds;
69%if (&tgt_ds = ) %then %let tgt_ds=&base_ds;
70%if %index(&tgt_ds,.)=0 %then %let tgt_ds=WORK.%scan(&base_ds,2,.);
71%if ("&outencoding" ne "") %then %let outencoding=encoding="&outencoding";
72%if ("&append" = "" or "&append" = "NO") %then %let append=;
73%else %let append=mod;
74
75/* get varcount */
76%let nvars=0;
77proc sql noprint;
78select count(*) into: nvars from dictionary.columns
79 where upcase(libname)="%scan(%upcase(&base_ds),1)"
80 and upcase(memname)="%scan(%upcase(&base_ds),2)";
81%if &nvars=0 %then %do;
82 %put %str(WARN)ING: Dataset &base_ds has no variables, will not be converted.;
83 %return;
84%end;
85
86/* get indexes */
87proc sort
88 data=sashelp.vindex(
89 where=(upcase(libname)="%scan(%upcase(&base_ds),1)"
90 and upcase(memname)="%scan(%upcase(&base_ds),2)")
91 )
92 out=_data_;
93 by indxname indxpos;
94run;
95
96%local indexes;
97data _null_;
98 set &syslast end=last;
99 if _n_=1 then call symputx('indexes','(index=(','l');
100 by indxname indxpos;
101 length vars $32767 nom uni $8;
102 retain vars;
103 if first.indxname then do;
104 idxcnt+1;
105 nom='';
106 uni='';
107 vars=name;
108 end;
109 else vars=catx(' ',vars,name);
110 if last.indxname then do;
111 if nomiss='yes' then nom='/nomiss';
112 if unique='yes' then uni='/unique';
113 call symputx('indexes'
114 ,catx(' ',symget('indexes'),indxname,'=(',vars,')',nom,uni)
115 ,'l');
116 end;
117 if last then call symputx('indexes',cats(symget('indexes'),'))'),'l');
118run;
119
120
121data;run;
122%let setds=&syslast;
123proc sql
124%if %datatyp(&maxobs)=NUMERIC %then %do;
125 outobs=&maxobs;
126%end;
127 ;
128 create table &setds as select * from &base_ds
129%if &random_sample=YES %then %do;
130 order by ranuni(42)
131%end;
132 ;
133reset outobs=max;
134create table datalines1 as
135 select name,type,length,varnum,format,label from dictionary.columns
136 where upcase(libname)="%upcase(%scan(&base_ds,1))"
137 and upcase(memname)="%upcase(%scan(&base_ds,2))";
138
139/**
140 Due to long decimals cannot use best. format
141 So - use bestd. format and then use character functions to strip trailing
142 zeros, if NOT an integer!!
143 resolved code = ifc(int(VARIABLE)=VARIABLE
144 ,put(VARIABLE,best32.)
145 ,substrn(put(VARIABLE,bestd32.),1
146 ,findc(put(VARIABLE,bestd32.),'0','TBK')));
147**/
148
149data datalines_2;
150 format dataline $32000.;
151 set datalines1 (where=(upcase(name) not in
152 ('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM')));
153 if type='num' then dataline=
154 cats('ifc(int(',name,')=',name,'
155 ,put(',name,',best32.-l)
156 ,substrn(put(',name,',bestd32.-l),1
157 ,findc(put(',name,',bestd32.-l),"0","TBK")))');
158 /**
159 * binary data must be converted, to store in text format. It is identified
160 * by the presence of the $HEX keyword in the format.
161 */
162 else if upcase(format)=:'$HEX' then
163 dataline=cats('put(trim(',name,'),',format,')');
164 /**
165 * There is no easy way to store line breaks in a cards file.
166 * To discuss this, use: https://github.com/sasjs/core/issues/80
167 * Removing all nonprintables with kw (keep writeable)
168 */
169 else dataline=cats('compress(',name,', ,"kw")');
170run;
171
172proc sql noprint;
173select dataline into: datalines separated by ',' from datalines_2;
174
175%local
176 process_dttm_flg
177 valid_from_dttm_flg
178 valid_to_dttm_flg
179;
180%let process_dttm_flg = N;
181%let valid_from_dttm_flg = N;
182%let valid_to_dttm_flg = N;
183data _null_;
184 set datalines1 ;
185/* build attrib statement */
186 if type='char' then type2='$';
187 if strip(format) ne '' then format2=cats('format=',format);
188 if strip(label) ne '' then label2=cats('label=',quote(trim(label)));
189 str1=catx(' ',(put(name,$33.)||'length=')
190 ,put(cats(type2,length),$7.)||format2,label2);
191
192
193/* Build input statement */
194 if upcase(format)=:'$HEX' then type3=':'!!format;
195 else if type='char' then type3=':$char.';
196 str2=put(name,$33.)||type3;
197
198
199 if(upcase(name) = "PROCESSED_DTTM") then
200 call symputx("process_dttm_flg", "Y", "L");
201 if(upcase(name) = "VALID_FROM_DTTM") then
202 call symputx("valid_from_dttm_flg", "Y", "L");
203 if(upcase(name) = "VALID_TO_DTTM") then
204 call symputx("valid_to_dttm_flg", "Y", "L");
205
206
207 call symputx(cats("attrib_stmt_", put(_N_, 8.)), str1, "L");
208 call symputx(cats("input_stmt_", put(_N_, 8.))
209 , ifc(upcase(name) not in
210 ('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM'), str2, ""), "L");
211run;
212
213data _null_;
214 file &cards_file. &outencoding lrecl=32767 termstr=nl &append;
215 length __attrib $32767;
216 if _n_=1 then do;
217 put '/**';
218 put ' @file';
219 put " @brief Datalines for %upcase(%scan(&base_ds,2)) dataset";
220 put " @details Generated by %nrstr(%%)mp_ds2cards()";
221 put " Available on github.com/sasjs/core";
222 put '**/';
223 put "data &tgt_ds &indexes;";
224 put "attrib ";
225 %do i = 1 %to &nvars;
226 __attrib=symget("attrib_stmt_&i");
227 put __attrib;
228 %end;
229 put ";";
230
231 %if &process_dttm_flg. eq Y %then %do;
232 put 'retain PROCESSED_DTTM %sysfunc(datetime());';
233 %end;
234 %if &valid_from_dttm_flg. eq Y %then %do;
235 put 'retain VALID_FROM_DTTM &low_date;';
236 %end;
237 %if &valid_to_dttm_flg. eq Y %then %do;
238 put 'retain VALID_TO_DTTM &high_date;';
239 %end;
240 if __nobs=0 then do;
241 put 'call missing(of _all_);/* avoid uninitialised notes */';
242 put 'stop;';
243 put 'run;';
244 end;
245 else do;
246 put "infile cards dsd;";
247 put "input ";
248 %do i = 1 %to &nvars.;
249 %if(%length(&&input_stmt_&i..)) %then
250 put " &&input_stmt_&i..";
251 ;
252 %end;
253 put ";";
254 put "datalines4;";
255 end;
256 end;
257 set &setds end=__lastobs nobs=__nobs;
258/* remove all formats for write purposes - some have long underlying decimals */
259 format _numeric_ best30.29;
260 length __dataline $32767;
261 __dataline=catq('cqtmb',&datalines);
262 put __dataline;
263 if __lastobs then do;
264 put ';;;;';
265 put 'run;';
266 stop;
267 end;
268run;
269proc sql;
270 drop table &setds;
271quit;
272
273%if &showlog=YES %then %do;
274 data _null_;
275 infile &cards_file lrecl=32767;
276 input;
277 put _infile_;
278 run;
279%end;
280
281%put NOTE: CARDS FILE SAVED IN:;
282%put NOTE-;%put NOTE-;
283%put NOTE- %sysfunc(dequote(&cards_file.));
284%put NOTE-;%put NOTE-;
285%mend mp_ds2cards;