Macros for SAS Application Developers
https://github.com/sasjs/core
Loading...
Searching...
No Matches
mp_filtercheck.sas
Go to the documentation of this file.
1/**
2 @file
3 @brief Checks an input filter table for validity
4 @details Performs checks on the input table to ensure it arrives in the
5 correct format. This is necessary to prevent code injection. Will update
6 SYSCC to 1008 if bad records are found, and call mp_abort.sas for a
7 graceful service exit (configurable).
8
9 Used for dynamic filtering in [Data Controller for SAS®](
10 https://datacontroller.io).
11
12 Usage:
13
14 %mp_filtercheck(work.filter,targetds=sashelp.class,outds=work.badrecords)
15
16 The input table should have the following format:
17
18 |GROUP_LOGIC:$3|SUBGROUP_LOGIC:$3|SUBGROUP_ID:8.|VARIABLE_NM:$32|OPERATOR_NM:$10|RAW_VALUE:$4000|
19 |---|---|---|---|---|---|
20 |AND|AND|1|AGE|=|12|
21 |AND|AND|1|SEX|<=|'M'|
22 |AND|OR|2|Name|NOT IN|('Jane','Alfred')|
23 |AND|OR|2|Weight|>=|7|
24
25 Rules applied:
26
27 @li GROUP_LOGIC - only AND/OR
28 @li SUBGROUP_LOGIC - only AND/OR
29 @li SUBGROUP_ID - only integers
30 @li VARIABLE_NM - must be in the target table
31 @li OPERATOR_NM - only =/>/</<=/>=/BETWEEN/IN/NOT IN/NE/CONTAINS
32 @li RAW_VALUE - no unquoted values except integers, commas and spaces.
33
34 @returns The &outds table containing any bad rows, plus a REASON_CD column.
35
36 @param [in] inds The table to be checked, with the format above
37 @param [in] targetds= The target dataset against which to verify VARIABLE_NM.
38 This must be available (ie, the library must be assigned).
39 @param [out] abort= (YES) If YES will call mp_abort.sas on any exceptions
40 @param [out] outds= (work.badrecords) The output table, which is a copy of the
41 &inds. table plus a REASON_CD column, containing only bad records.
42 If bad records are found, the SYSCC value will be set to 1008
43 (a general data problem).
44 Downstream processes should check this table (and return code) before
45 continuing.
46
47 <h4> SAS Macros </h4>
48 @li mp_abort.sas
49 @li mf_getuniquefileref.sas
50 @li mf_getvarlist.sas
51 @li mf_getvartype.sas
52 @li mp_filtergenerate.sas
53 @li mp_filtervalidate.sas
54
55 <h4> Related Macros </h4>
56 @li mp_filtergenerate.sas
57 @li mp_filtervalidate.sas
58
59 @version 9.3
60 @author Allan Bowe
61
62 @todo Support date / hex / name literals and exponents in RAW_VALUE field
63**/
64
65%macro mp_filtercheck(inds,targetds=,outds=work.badrecords,abort=YES);
66
67%mp_abort(iftrue= (&syscc ne 0)
68 ,mac=&sysmacroname
69 ,msg=%str(syscc=&syscc - on macro entry)
70)
71
72/* Validate input column */
73%local vtype;
74%let vtype=%mf_getvartype(&inds,RAW_VALUE);
75%mp_abort(iftrue=(&abort=YES and &vtype ne C),
76 mac=&sysmacroname,
77 msg=%str(%str(ERR)OR: RAW_VALUE must be character)
78)
79%if &vtype ne C %then %do;
80 %put &sysmacroname: RAW_VALUE must be character;
81 %let syscc=42;
82 %return;
83%end;
84
85
86/**
87 * Sanitise the values based on valid value lists, then strip out
88 * quotes, commas, periods and spaces.
89 */
90%local reason_cd nobs;
91%let nobs=0;
92data &outds;
93 /*length GROUP_LOGIC SUBGROUP_LOGIC $3 SUBGROUP_ID 8 VARIABLE_NM $32
94 OPERATOR_NM $10 RAW_VALUE $4000;*/
95 set &inds end=last;
96 length reason_cd $4032 vtype vtype2 $1 vnum dsid 8 tmp $4000;
97 drop tmp;
98
99 /* quick check to ensure column exists */
100 if upcase(VARIABLE_NM) not in
101 (%upcase(%mf_getvarlist(&targetds,dlm=%str(,),quote=SINGLE)))
102 then do;
103 REASON_CD="Variable "!!cats(variable_nm)!!" not in &targetds";
104 putlog REASON_CD= VARIABLE_NM=;
105 call symputx('reason_cd',reason_cd,'l');
106 call symputx('nobs',_n_,'l');
107 output;
108 return;
109 end;
110
111 /* need to open the dataset to get the column type */
112 retain dsid;
113 if _n_=1 then dsid=open("&targetds","i");
114 if dsid>0 then do;
115 vnum=varnum(dsid,VARIABLE_NM);
116 if vnum<1 then do;
117 /* should not happen as was also tested for above */
118 REASON_CD=cats("Variable (",VARIABLE_NM,") not found in &targetds");
119 putlog REASON_CD= dsid=;
120 call symputx('reason_cd',reason_cd,'l');
121 call symputx('nobs',_n_,'l');
122 output;
123 goto endstep;
124 end;
125 /* now we can get the type */
126 else vtype=vartype(dsid,vnum);
127 end;
128 else do;
129 REASON_CD=cats("Could not open &targetds");
130 putlog REASON_CD= dsid=;
131 call symputx('reason_cd',reason_cd,'l');
132 call symputx('nobs',_n_,'l');
133 output;
134 stop;
135 end;
136
137 /* closed list checks */
138 if GROUP_LOGIC not in ('AND','OR') then do;
139 REASON_CD='GROUP_LOGIC should be AND/OR, not:'!!cats(GROUP_LOGIC);
140 putlog REASON_CD= GROUP_LOGIC=;
141 call symputx('reason_cd',reason_cd,'l');
142 call symputx('nobs',_n_,'l');
143 output;
144 end;
145 if SUBGROUP_LOGIC not in ('AND','OR') then do;
146 REASON_CD='SUBGROUP_LOGIC should be AND/OR, not:'!!cats(SUBGROUP_LOGIC);
147 putlog REASON_CD= SUBGROUP_LOGIC=;
148 call symputx('reason_cd',reason_cd,'l');
149 call symputx('nobs',_n_,'l');
150 output;
151 end;
152 if mod(SUBGROUP_ID,1) ne 0 then do;
153 REASON_CD='SUBGROUP_ID should be integer, not '!!cats(subgroup_id);
154 putlog REASON_CD= SUBGROUP_ID=;
155 call symputx('reason_cd',reason_cd,'l');
156 call symputx('nobs',_n_,'l');
157 output;
158 end;
159 if OPERATOR_NM not in
160 ('=','>','<','<=','>=','NE','GE','LE','BETWEEN','IN','NOT IN','CONTAINS')
161 then do;
162 REASON_CD='Invalid OPERATOR_NM: '!!cats(OPERATOR_NM);
163 putlog REASON_CD= OPERATOR_NM=;
164 call symputx('reason_cd',reason_cd,'l');
165 call symputx('nobs',_n_,'l');
166 output;
167 end;
168
169 /* special missing logic */
170 if vtype='N' & OPERATOR_NM in ('=','>','<','<=','>=','NE','GE','LE') then do;
171 if cats(upcase(raw_value)) in (
172 '.','.A','.B','.C','.D','.E','.F','.G','.H','.I','.J','.K','.L','.M','.N'
173 '.N','.O','.P','.Q','.R','.S','.T','.U','.V','.W','.X','.Y','.Z','._'
174 )
175 then do;
176 /* valid numeric - exit data step loop */
177 return;
178 end;
179 else if subpad(upcase(raw_value),1,1) in (
180 'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
181 'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
182 )
183 then do;
184 /* check if the raw_value contains a valid variable NAME */
185 vnum=varnum(dsid,subpad(raw_value,1,32));
186 if vnum>0 then do;
187 /* now we can get the type */
188 vtype2=vartype(dsid,vnum);
189 /* check type matches */
190 if vtype2=vtype then do;
191 /* valid target var - exit loop */
192 return;
193 end;
194 else do;
195 REASON_CD=cats("Compared Type (",vtype2,") is not (",vtype,")");
196 putlog REASON_CD= dsid=;
197 call symputx('reason_cd',reason_cd,'l');
198 call symputx('nobs',_n_,'l');
199 output;
200 goto endstep;
201 end;
202 end;
203 end;
204 end;
205
206 /* special logic */
207 if OPERATOR_NM in ('IN','NOT IN','BETWEEN') then do;
208 if OPERATOR_NM='BETWEEN' then raw_value1=tranwrd(raw_value,' AND ',',');
209 else do;
210 if substr(raw_value,1,1) ne '('
211 or substr(cats(reverse(raw_value)),1,1) ne ')'
212 then do;
213 REASON_CD='Missing start/end bracket in RAW_VALUE';
214 putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
215 call symputx('reason_cd',reason_cd,'l');
216 call symputx('nobs',_n_,'l');
217 output;
218 end;
219 else raw_value1=substr(raw_value,2,max(length(raw_value)-2,0));
220 end;
221 /* we now have a comma seperated list of values */
222 if vtype='N' then do i=1 to countc(raw_value1, ',')+1;
223 tmp=scan(raw_value1,i,',');
224 if cats(tmp) ne '.' and input(tmp, ?? 8.) eq . then do;
225 if OPERATOR_NM ='BETWEEN' and subpad(upcase(tmp),1,1) in (
226 'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
227 'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
228 )
229 then do;
230 /* check if the raw_value contains a valid variable NAME */
231 /* is not valid syntax for IN or NOT IN */
232 vnum=varnum(dsid,subpad(tmp,1,32));
233 if vnum>0 then do;
234 /* now we can get the type */
235 vtype2=vartype(dsid,vnum);
236 /* check type matches */
237 if vtype2=vtype then do;
238 /* valid target var - exit loop */
239 return;
240 end;
241 else do;
242 REASON_CD=cats("Compared Type (",vtype2,") is not (",vtype,")");
243 putlog REASON_CD= dsid=;
244 call symputx('reason_cd',reason_cd,'l');
245 call symputx('nobs',_n_,'l');
246 output;
247 goto endstep;
248 end;
249 end;
250 end;
251 REASON_CD='Non Numeric value provided';
252 putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
253 call symputx('reason_cd',reason_cd,'l');
254 call symputx('nobs',_n_,'l');
255 output;
256 end;
257 return;
258 end;
259 end;
260 else raw_value1=raw_value;
261
262 /* remove nested literals eg '' */
263 raw_value1=tranwrd(raw_value1,"''",'');
264
265 /* now match string literals (always single quotes) */
266 raw_value2=raw_value1;
267 regex = prxparse("s/(\').*?(\')//");
268 call prxchange(regex,-1,raw_value2);
269
270 /* remove commas and periods*/
271 raw_value3=compress(raw_value2,',.');
272
273 /* output records that contain values other than digits and spaces */
274 if notdigit(compress(raw_value3,' '))>0 then do;
275 if vtype='C' and subpad(upcase(raw_value),1,1) in (
276 'A','B','C','D','E','F','G','H','I','J','K','L','M','N'
277 'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_'
278 )
279 then do;
280 /* check if the raw_value contains a valid variable NAME */
281 vnum=varnum(dsid,subpad(raw_value,1,32));
282 if vnum>0 then do;
283 /* now we can get the type */
284 vtype2=vartype(dsid,vnum);
285 /* check type matches */
286 if vtype2=vtype then do;
287 /* valid target var - exit loop */
288 return;
289 end;
290 else do;
291 REASON_CD=cats("Compared Char Type (",vtype2,") is not (",vtype,")");
292 putlog REASON_CD= dsid=;
293 call symputx('reason_cd',reason_cd,'l');
294 call symputx('nobs',_n_,'l');
295 output;
296 goto endstep;
297 end;
298 end;
299 end;
300
301 putlog raw_value3= $hex32.;
302 REASON_CD=cats('Invalid RAW_VALUE:',raw_value);
303 putlog (_all_)(=);
304 call symputx('reason_cd',reason_cd,'l');
305 call symputx('nobs',_n_,'l');
306 output;
307 end;
308
309 endstep:
310 if last then rc=close(dsid);
311run;
312
313
314data _null_;
315 set &outds end=last;
316 putlog (_all_)(=);
317run;
318
319%mp_abort(iftrue=(&abort=YES and &nobs>0),
320 mac=&sysmacroname,
321 msg=%str(Data issue: %superq(reason_cd))
322)
323
324%if &nobs>0 %then %do;
325 %let syscc=1008;
326 %return;
327%end;
328
329/**
330 * syntax checking passed but it does not mean the filter is valid
331 * for that we can run a proc sql validate query
332 */
333%local fref1;
334%let fref1=%mf_getuniquefileref();
335%mp_filtergenerate(&inds,outref=&fref1)
336
337/* this macro will also set syscc to 1008 if any issues found */
338%mp_filtervalidate(&fref1,&targetds,outds=&outds,abort=&abort)
339
340%mend mp_filtercheck;