Macros for SAS Application Developers
https://github.com/sasjs/core
mp_filtercheck.sas
Go to the documentation of this file.
1 /**
2  @file
3  @brief Checks an input filter table for validity
4  @details Performs checks on the input table to ensure it arrives in the
5  correct format. This is necessary to prevent code injection. Will update
6  SYSCC to 1008 if bad records are found, and call mp_abort.sas for a
7  graceful service exit (configurable).
8 
9  Used for dynamic filtering in [Data Controller for SAS®](
10  https://datacontroller.io).
11 
12  Usage:
13 
14  %mp_filtercheck(work.filter,targetds=sashelp.class,outds=work.badrecords)
15 
16  The input table should have the following format:
17 
18  |GROUP_LOGIC:$3|SUBGROUP_LOGIC:$3|SUBGROUP_ID:8.|VARIABLE_NM:$32|OPERATOR_NM:$10|RAW_VALUE:$4000|
19  |---|---|---|---|---|---|
20  |AND|AND|1|AGE|=|12|
21  |AND|AND|1|SEX|<=|'M'|
22  |AND|OR|2|Name|NOT IN|('Jane','Alfred')|
23  |AND|OR|2|Weight|>=|7|
24 
25  Rules applied:
26 
27  @li GROUP_LOGIC - only AND/OR
28  @li SUBGROUP_LOGIC - only AND/OR
29  @li SUBGROUP_ID - only integers
30  @li VARIABLE_NM - must be in the target table
31  @li OPERATOR_NM - only =/>/</<=/>=/BETWEEN/IN/NOT IN/NE/CONTAINS
32  @li RAW_VALUE - no unquoted values except integers, commas and spaces.
33 
34  @returns The &outds table containing any bad rows, plus a REASON_CD column.
35 
36  @param [in] inds The table to be checked, with the format above
37  @param [in] targetds= The target dataset against which to verify VARIABLE_NM.
38  This must be available (ie, the library must be assigned).
39  @param [out] abort= (YES) If YES will call mp_abort.sas on any exceptions
40  @param [out] outds= (work.badrecords) The output table, which is a copy of the
41  &inds. table plus a REASON_CD column, containing only bad records.
42  If bad records are found, the SYSCC value will be set to 1008
43  (a general data problem).
44  Downstream processes should check this table (and return code) before
45  continuing.
46 
47  <h4> SAS Macros </h4>
48  @li mp_abort.sas
49  @li mf_getuniquefileref.sas
50  @li mf_getvarlist.sas
51  @li mf_getvartype.sas
52  @li mp_filtergenerate.sas
53  @li mp_filtervalidate.sas
54 
55  <h4> Related Macros </h4>
56  @li mp_filtergenerate.sas
57  @li mp_filtervalidate.sas
58 
59  @version 9.3
60  @author Allan Bowe
61 
62  @todo Support date / hex / name literals and exponents in RAW_VALUE field
63 **/
64 
65 %macro mp_filtercheck(inds,targetds=,outds=work.badrecords,abort=YES);
66 
67 %mp_abort(iftrue= (&syscc ne 0)
68  ,mac=&sysmacroname
69  ,msg=%str(syscc=&syscc - on macro entry)
70 )
71 
72 /* Validate input column */
73 %local vtype;
74 %let vtype=%mf_getvartype(&inds,RAW_VALUE);
75 %mp_abort(iftrue=(&abort=YES and &vtype ne C),
76  mac=&sysmacroname,
77  msg=%str(%str(ERR)OR: RAW_VALUE must be character)
78 )
79 %if &vtype ne C %then %do;
80  %put &sysmacroname: RAW_VALUE must be character;
81  %let syscc=42;
82  %return;
83 %end;
84 
85 
86 /**
87  * Sanitise the values based on valid value lists, then strip out
88  * quotes, commas, periods and spaces.
89  * Only numeric values should remain
90  */
91 %local reason_cd nobs;
92 %let nobs=0;
93 data &outds;
94  /*length GROUP_LOGIC SUBGROUP_LOGIC $3 SUBGROUP_ID 8 VARIABLE_NM $32
95  OPERATOR_NM $10 RAW_VALUE $4000;*/
96  set &inds;
97  length reason_cd $4032 vtype $1 vnum dsid 8 tmp $4000;
98  drop tmp;
99 
100  /* quick check to ensure column exists */
101  if upcase(VARIABLE_NM) not in
102  (%upcase(%mf_getvarlist(&targetds,dlm=%str(,),quote=SINGLE)))
103  then do;
104  REASON_CD="Variable "!!cats(variable_nm)!!" not in &targetds";
105  putlog REASON_CD= VARIABLE_NM=;
106  call symputx('reason_cd',reason_cd,'l');
107  call symputx('nobs',_n_,'l');
108  output;
109  return;
110  end;
111 
112  /* need to open the dataset to get the column type */
113  dsid=open("&targetds","i");
114  if dsid>0 then do;
115  vnum=varnum(dsid,VARIABLE_NM);
116  if vnum<1 then do;
117  /* should not happen as was also tested for above */
118  REASON_CD=cats("Variable (",VARIABLE_NM,") not found in &targetds");
119  putlog REASON_CD= dsid=;
120  call symputx('reason_cd',reason_cd,'l');
121  call symputx('nobs',_n_,'l');
122  output;
123  return;
124  end;
125  /* now we can get the type */
126  else vtype=vartype(dsid,vnum);
127  end;
128 
129  /* closed list checks */
130  if GROUP_LOGIC not in ('AND','OR') then do;
131  REASON_CD='GROUP_LOGIC should be AND/OR, not:'!!cats(GROUP_LOGIC);
132  putlog REASON_CD= GROUP_LOGIC=;
133  call symputx('reason_cd',reason_cd,'l');
134  call symputx('nobs',_n_,'l');
135  output;
136  end;
137  if SUBGROUP_LOGIC not in ('AND','OR') then do;
138  REASON_CD='SUBGROUP_LOGIC should be AND/OR, not:'!!cats(SUBGROUP_LOGIC);
139  putlog REASON_CD= SUBGROUP_LOGIC=;
140  call symputx('reason_cd',reason_cd,'l');
141  call symputx('nobs',_n_,'l');
142  output;
143  end;
144  if mod(SUBGROUP_ID,1) ne 0 then do;
145  REASON_CD='SUBGROUP_ID should be integer, not '!!cats(subgroup_id);
146  putlog REASON_CD= SUBGROUP_ID=;
147  call symputx('reason_cd',reason_cd,'l');
148  call symputx('nobs',_n_,'l');
149  output;
150  end;
151  if OPERATOR_NM not in
152  ('=','>','<','<=','>=','NE','GE','LE','BETWEEN','IN','NOT IN','CONTAINS')
153  then do;
154  REASON_CD='Invalid OPERATOR_NM: '!!cats(OPERATOR_NM);
155  putlog REASON_CD= OPERATOR_NM=;
156  call symputx('reason_cd',reason_cd,'l');
157  call symputx('nobs',_n_,'l');
158  output;
159  end;
160 
161  /* special missing logic */
162  if vtype='N'
163  and OPERATOR_NM in ('=','>','<','<=','>=','NE','GE','LE')
164  and cats(upcase(raw_value)) in (
165  '.','.A','.B','.C','.D','.E','.F','.G','.H','.I','.J','.K','.L','.M','.N'
166  '.N','.O','.P','.Q','.R','.S','.T','.U','.V','.W','.X','.Y','.Z','._'
167  )
168  then do;
169  /* valid numeric - exit data step loop */
170  return;
171  end;
172 
173  /* special logic */
174  if OPERATOR_NM in ('IN','NOT IN','BETWEEN') then do;
175  if OPERATOR_NM='BETWEEN' then raw_value1=tranwrd(raw_value,' AND ',',');
176  else do;
177  if substr(raw_value,1,1) ne '('
178  or substr(cats(reverse(raw_value)),1,1) ne ')'
179  then do;
180  REASON_CD='Missing start/end bracket in RAW_VALUE';
181  putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
182  call symputx('reason_cd',reason_cd,'l');
183  call symputx('nobs',_n_,'l');
184  output;
185  end;
186  else raw_value1=substr(raw_value,2,max(length(raw_value)-2,0));
187  end;
188  /* we now have a comma seperated list of values */
189  if vtype='N' then do i=1 to countc(raw_value1, ',')+1;
190  tmp=scan(raw_value1,i,',');
191  if cats(tmp) ne '.' and input(tmp, ?? 8.) eq . then do;
192  REASON_CD='Non Numeric value provided';
193  putlog REASON_CD= OPERATOR_NM= raw_value= raw_value1= ;
194  call symputx('reason_cd',reason_cd,'l');
195  call symputx('nobs',_n_,'l');
196  output;
197  end;
198  return;
199  end;
200  end;
201  else raw_value1=raw_value;
202 
203  /* remove nested literals eg '' */
204  raw_value1=tranwrd(raw_value1,"''",'');
205 
206  /* now match string literals (always single quotes) */
207  raw_value2=raw_value1;
208  regex = prxparse("s/(\').*?(\')//");
209  call prxchange(regex,-1,raw_value2);
210 
211  /* remove commas and periods*/
212  raw_value3=compress(raw_value2,',.');
213 
214  /* output records that contain values other than digits and spaces */
215  if notdigit(compress(raw_value3,' '))>0 then do;
216  putlog raw_value3= $hex32.;
217  REASON_CD=cats('Invalid RAW_VALUE:',raw_value);
218  putlog REASON_CD= raw_value= raw_value1= raw_value2= raw_value3=;
219  call symputx('reason_cd',reason_cd,'l');
220  call symputx('nobs',_n_,'l');
221  output;
222  end;
223 
224 run;
225 
226 
227 data _null_;
228  set &outds end=last;
229  putlog (_all_)(=);
230 run;
231 
232 %mp_abort(iftrue=(&abort=YES and &nobs>0),
233  mac=&sysmacroname,
234  msg=%str(Data issue: %superq(reason_cd))
235 )
236 
237 %if &nobs>0 %then %do;
238  %let syscc=1008;
239  %return;
240 %end;
241 
242 /**
243  * syntax checking passed but it does not mean the filter is valid
244  * for that we can run a proc sql validate query
245  */
246 %local fref1;
247 %let fref1=%mf_getuniquefileref();
248 %mp_filtergenerate(&inds,outref=&fref1)
249 
250 /* this macro will also set syscc to 1008 if any issues found */
251 %mp_filtervalidate(&fref1,&targetds,outds=&outds,abort=&abort)
252 
253 %mend mp_filtercheck;