Macros for SAS Application Developers
https://github.com/sasjs/core
mp_filtercheck.sas
Go to the documentation of this file.
1/**
2 @file
3 @brief Checks an input filter table for validity
4 @details Performs checks on the input table to ensure it arrives in the
5 correct format. This is necessary to prevent code injection. Will update
6 SYSCC to 1008 if bad records are found, and call mp_abort.sas for a
7 graceful service exit (configurable).
8
9 Used for dynamic filtering in [Data Controller for SAS®](
10 https://datacontroller.io).
11
12 Usage:
13
14 %mp_filtercheck(work.filter,targetds=sashelp.class,outds=work.badrecords)
15
16 The input table should have the following format:
17
18 |GROUP_LOGIC:$3|SUBGROUP_LOGIC:$3|SUBGROUP_ID:8.|VARIABLE_NM:$32|OPERATOR_NM:$10|RAW_VALUE:$4000|
19 |---|---|---|---|---|---|
20 |AND|AND|1|AGE|=|12|
21 |AND|AND|1|SEX|<=|'M'|
22 |AND|OR|2|Name|NOT IN|('Jane','Alfred')|
23 |AND|OR|2|Weight|>=|7|
24
25 Rules applied:
26
27 @li GROUP_LOGIC - only AND/OR
28 @li SUBGROUP_LOGIC - only AND/OR
29 @li SUBGROUP_ID - only integers
30 @li VARIABLE_NM - must be in the target table
31 @li OPERATOR_NM - only =/>/</<=/>=/BETWEEN/IN/NOT IN/NE/CONTAINS
32 @li RAW_VALUE - no unquoted values except integers, commas and spaces.
33
34 @returns The &outds table containing any bad rows, plus a REASON_CD column.
35
36 @param [in] inds The table to be checked, with the format above
37 @param [in] targetds= The target dataset against which to verify VARIABLE_NM.
38 This must be available (ie, the library must be assigned).
39 @param [out] abort= (YES) If YES will call mp_abort.sas on any exceptions
40 @param [out] outds= The output table, which is a copy of the &inds. table
41 plus a REASON_CD column, containing only bad records. If bad records found,
42 the SYSCC value will be set to 1008 (general data problem). Downstream
43 processes should check this table (and return code) before continuing.
44
45 <h4> SAS Macros </h4>
46 @li mp_abort.sas
47 @li mf_getuniquefileref.sas
48 @li mf_getvarlist.sas
49 @li mf_getvartype.sas
50 @li mp_filtergenerate.sas
51 @li mp_filtervalidate.sas
52
53 <h4> Related Macros </h4>
54 @li mp_filtergenerate.sas
55 @li mp_filtervalidate.sas
56
57 @version 9.3
58 @author Allan Bowe
59
60 @todo Support date / hex / name literals and exponents in RAW_VALUE field
61**/
62
63%macro mp_filtercheck(inds,targetds=,outds=work.badrecords,abort=YES);
64
65%mp_abort(iftrue= (&syscc ne 0)
66 ,mac=&sysmacroname
67 ,msg=%str(syscc=&syscc - on macro entry)
68)
69
70/* Validate input column */
71%local vtype;
72%let vtype=%mf_getvartype(&inds,RAW_VALUE);
73%mp_abort(iftrue=(&abort=YES and &vtype ne C),
74 mac=&sysmacroname,
75 msg=%str(%str(ERR)OR: RAW_VALUE must be character)
76)
77%if &vtype ne C %then %do;
78 %put &sysmacroname: RAW_VALUE must be character;
79 %let syscc=42;
80 %return;
81%end;
82
83
84/**
85 * Sanitise the values based on valid value lists, then strip out
86 * quotes, commas, periods and spaces.
87 * Only numeric values should remain
88 */
89%local reason_cd nobs;
90%let nobs=0;
91data &outds;
92 /*length GROUP_LOGIC SUBGROUP_LOGIC $3 SUBGROUP_ID 8 VARIABLE_NM $32
93 OPERATOR_NM $10 RAW_VALUE $4000;*/
94 set &inds;
95 length reason_cd $4032 vtype $1 vnum dsid 8 tmp $4000;
96 drop tmp;
97
98 /* quick check to ensure column exists */
99 if upcase(VARIABLE_NM) not in
100 (%upcase(%mf_getvarlist(&targetds,dlm=%str(,),quote=SINGLE)))
101 then do;
102 REASON_CD="Variable "!!cats(variable_nm)!!" not in &targetds";
103 putlog REASON_CD= VARIABLE_NM=;
104 call symputx('reason_cd',reason_cd,'l');
105 call symputx('nobs',_n_,'l');
106 output;
107 return;
108 end;
109
110 /* need to open the dataset to get the column type */
111 dsid=open("&targetds","i");
112 if dsid>0 then do;
113 vnum=varnum(dsid,VARIABLE_NM);
114 if vnum<1 then do;
115 /* should not happen as was also tested for above */
116 REASON_CD=cats("Variable (",VARIABLE_NM,") not found in &targetds");
117 putlog REASON_CD= dsid=;
118 call symputx('reason_cd',reason_cd,'l');
119 call symputx('nobs',_n_,'l');
120 output;
121 return;
122 end;
123 /* now we can get the type */
124 else vtype=vartype(dsid,vnum);
125 end;
126
127 /* closed list checks */
128 if GROUP_LOGIC not in ('AND','OR') then do;
129 REASON_CD='GROUP_LOGIC should be AND/OR, not:'!!cats(GROUP_LOGIC);
130 putlog REASON_CD= GROUP_LOGIC=;
131 call symputx('reason_cd',reason_cd,'l');
132 call symputx('nobs',_n_,'l');
133 output;
134 end;
135 if SUBGROUP_LOGIC not in ('AND','OR') then do;
136 REASON_CD='SUBGROUP_LOGIC should be AND/OR, not:'!!cats(SUBGROUP_LOGIC);
137 putlog REASON_CD= SUBGROUP_LOGIC=;
138 call symputx('reason_cd',reason_cd,'l');
139 call symputx('nobs',_n_,'l');
140 output;
141 end;
142 if mod(SUBGROUP_ID,1) ne 0 then do;
143 REASON_CD='SUBGROUP_ID should be integer, not '!!cats(subgroup_id);
144 putlog REASON_CD= SUBGROUP_ID=;
145 call symputx('reason_cd',reason_cd,'l');
146 call symputx('nobs',_n_,'l');
147 output;
148 end;
149 if OPERATOR_NM not in
150 ('=','>','<','<=','>=','NE','GE','LE','BETWEEN','IN','NOT IN','CONTAINS')
151 then do;
152 REASON_CD='Invalid OPERATOR_NM: '!!cats(OPERATOR_NM);
153 putlog REASON_CD= OPERATOR_NM=;
154 call symputx('reason_cd',reason_cd,'l');
155 call symputx('nobs',_n_,'l');
156 output;
157 end;
158
159 /* special missing logic */
160 if vtype='N'
161 and OPERATOR_NM in ('=','>','<','<=','>=','NE','GE','LE')
162 and cats(upcase(raw_value)) in (
163 '.','.A','.B','.C','.D','.E','.F','.G','.H','.I','.J','.K','.L','.M','.N'
164 '.N','.O','.P','.Q','.R','.S','.T','.U','.V','.W','.X','.Y','.Z','._'
165 )
166 then do;
167 /* valid numeric - exit data step loop */
168 return;
169 end;
170
171 /* special logic */
172 if OPERATOR_NM in ('IN','NOT IN','BETWEEN') then do;
173 if OPERATOR_NM='BETWEEN' then raw_value1=tranwrd(raw_value,' AND ',',');
174 else do;
175 if substr(raw_value,1,1) ne '('
176 or substr(cats(reverse(raw_value)),1,1) ne ')'
177 then do;
178 REASON_CD='Missing start/end bracket in RAW_VALUE';
179 putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
180 call symputx('reason_cd',reason_cd,'l');
181 call symputx('nobs',_n_,'l');
182 output;
183 end;
184 else raw_value1=substr(raw_value,2,max(length(raw_value)-2,0));
185 end;
186 /* we now have a comma seperated list of values */
187 if vtype='N' then do i=1 to countc(raw_value1, ',')+1;
188 tmp=scan(raw_value1,i,',');
189 if cats(tmp) ne '.' and input(tmp, ?? 8.) eq . then do;
190 REASON_CD='Non Numeric value provided';
191 putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
192 call symputx('reason_cd',reason_cd,'l');
193 call symputx('nobs',_n_,'l');
194 output;
195 end;
196 return;
197 end;
198 end;
199 else raw_value1=raw_value;
200
201 /* remove nested literals eg '' */
202 raw_value1=tranwrd(raw_value1,"''",'');
203
204 /* now match string literals (always single quotes) */
205 raw_value2=raw_value1;
206 regex = prxparse("s/(\').*?(\')//");
207 call prxchange(regex,-1,raw_value2);
208
209 /* remove commas and periods*/
210 raw_value3=compress(raw_value2,',.');
211
212 /* output records that contain values other than digits and spaces */
213 if notdigit(compress(raw_value3,' '))>0 then do;
214 putlog raw_value3= $hex32.;
215 REASON_CD=cats('Invalid RAW_VALUE:',raw_value);
216 putlog REASON_CD= raw_value= raw_value1= raw_value2= raw_value3=;
217 call symputx('reason_cd',reason_cd,'l');
218 call symputx('nobs',_n_,'l');
219 output;
220 end;
221
222run;
223
224
225data _null_;
226 set &outds end=last;
227 putlog (_all_)(=);
228run;
229
230%mp_abort(iftrue=(&abort=YES and &nobs>0),
231 mac=&sysmacroname,
232 msg=%str(Data issue: %superq(reason_cd))
233)
234
235%if &nobs>0 %then %do;
236 %let syscc=1008;
237 %return;
238%end;
239
240/**
241 * syntax checking passed but it does not mean the filter is valid
242 * for that we can run a proc sql validate query
243 */
244%local fref1;
245%let fref1=%mf_getuniquefileref();
246%mp_filtergenerate(&inds,outref=&fref1)
247
248/* this macro will also set syscc to 1008 if any issues found */
249%mp_filtervalidate(&fref1,&targetds,outds=&outds,abort=&abort)
250
251%mend mp_filtercheck;