Macros for SAS Application Developers
https://github.com/sasjs/core
mp_chop.sas
Go to the documentation of this file.
1/**
2 @file
3 @brief Splits a file of ANY SIZE by reference to a search string.
4 @details Provide a fileref and a search string to chop off part of a file.
5
6 Works by reading in the file byte by byte, then marking the beginning and end
7 of each matched string, before finally doing the chop.
8
9 Choose whether to keep the FIRST or the LAST section of the file. Optionally,
10 use an OFFSET to fix the precise chop point.
11
12 Usage:
13
14 %let src="%sysfunc(pathname(work))/file.txt";
15 %let str=Chop here!;
16 %let out1="%sysfunc(pathname(work))/file1.txt";
17 %let out2="%sysfunc(pathname(work))/file2.txt";
18 %let out3="%sysfunc(pathname(work))/file3.txt";
19 %let out4="%sysfunc(pathname(work))/file4.txt";
20
21 data _null_;
22 file &src;
23 put "startsection&str.endsection";
24 run;
25
26 %mp_chop(&src, matchvar=str, keep=FIRST, outfile=&out1)
27 %mp_chop(&src, matchvar=str, keep=LAST, outfile=&out2)
28 %mp_chop(&src, matchvar=str, keep=FIRST, matchpoint=END, outfile=&out3)
29 %mp_chop(&src, matchvar=str, keep=LAST, matchpoint=END, outfile=&out4)
30
31 filename results (&out1 &out2 &out3 &out4);
32 data _null_;
33 infile results;
34 input;
35 list;
36 run;
37
38 Results:
39 @li `startsection`
40 @li `Chop here!endsection`
41 @li `startsectionChop here!`
42 @li `endsection`
43
44 For more examples, see mp_chop.test.sas
45
46 @param [in] infile The QUOTED path to the file on which to perform the chop
47 @param [in] matchvar= Macro variable NAME containing the string to split by
48 @param [in] matchpoint= (START) Valid values:
49 @li START - chop at the beginning of the string in `matchvar`.
50 @li END - chop at the end of the string in `matchvar`.
51 @param [in] offset= (0) An adjustment to the precise chop location, by
52 by reference to the `matchpoint`. Should be a positive or negative integer.
53 @param [in] keep= (FIRST) Valid values:
54 @li FIRST - keep the section of the file before the chop
55 @li LAST - keep the section of the file after the chop
56 @param [in] mdebug= (0) Set to 1 to provide macro debugging
57 @param outfile= (0) Optional QUOTED path to the adjusted output file (avoids
58 overwriting the first file).
59
60 <h4> SAS Macros </h4>
61 @li mf_getuniquefileref.sas
62 @li mf_getuniquename.sas
63
64 <h4> Related Macros </h4>
65 @li mp_abort.sas
66 @li mp_gsubfile.sas
67 @li mp_replace.sas
68 @li mp_chop.test.sas
69
70 @version 9.4
71 @author Allan Bowe
72
73**/
74
75%macro mp_chop(infile,
76 matchvar=,
77 matchpoint=START,
78 keep=FIRST,
79 offset=0,
80 mdebug=0,
81 outfile=0
82)/*/STORE SOURCE*/;
83
84%local fref0 dttm ds1 outref;
85%let fref0=%mf_getuniquefileref();
86%let ds1=%mf_getuniquename(prefix=allchars);
87%let ds2=%mf_getuniquename(prefix=startmark);
88
89%if &outfile=0 %then %let outfile=&infile;
90
91%mp_abort(iftrue= (%length(%superq(&matchvar))=0)
92 ,mac=mp_chop.sas
93 ,msg=%str(&matchvar is an empty variable)
94)
95
96/* START */
97%let dttm=%sysfunc(datetime());
98
99filename &fref0 &infile lrecl=1 recfm=n;
100
101/* create dataset with one char per row */
102data &ds1;
103 infile &fref0;
104 input sourcechar $char1. @@;
105 format sourcechar hex2.;
106run;
107
108/* get start & stop position of first matchvar string (one row, two vars) */
109data &ds2;
110 /* set find string to length in bytes to cover trailing spaces */
111 length string $ %length(%superq(&matchvar));
112 string =symget("&matchvar");
113 drop string;
114
115 firstchar=char(string,1);
116 findlen=lengthm(string); /* <- for trailing bytes */
117
118 do _N_=1 to nobs;
119 set &ds1 nobs=nobs point=_N_;
120 if sourcechar=firstchar then do;
121 pos=1;
122 s=0;
123 do point=_N_ to min(_N_ + findlen -1,nobs);
124 set &ds1 point=point;
125 if sourcechar=char(string, pos) then s + 1;
126 else goto _leave_;
127 pos+1;
128 end;
129 _leave_:
130 if s=findlen then do;
131 START =_N_;
132 _N_ =_N_+ s - 1;
133 STOP =_N_;
134 output;
135 /* matched! */
136 stop;
137 end;
138 end;
139 end;
140 stop;
141 keep START STOP;
142run;
143
144%local split;
145%let split=0;
146data _null_;
147 set &ds2;
148 if "&matchpoint"='START' then do;
149 if "&keep"='FIRST' then mp=start;
150 else if "&keep"='LAST' then mp=start-1;
151 end;
152 else if "&matchpoint"='END' then do;
153 if "&keep"='FIRST' then mp=stop+1;
154 else if "&keep"='LAST' then mp=stop;
155 end;
156 split=mp+&offset;
157 call symputx('split',split,'l');
158%if &mdebug=1 %then %do;
159 put (_all_)(=);
160 %put &=offset;
161%end;
162run;
163%if &split=0 %then %do;
164 %put &sysmacroname: No match found in &infile for string %superq(&matchvar);
165 %return;
166%end;
167
168data _null_;
169 file &outfile recfm=n;
170 set &ds1;
171%if &keep=FIRST %then %do;
172 if _n_ ge &split then stop;
173%end;
174%else %do;
175 if _n_ gt &split;
176%end;
177 put sourcechar char1.;
178run;
179
180%if &mdebug=0 %then %do;
181 filename &fref0 clear;
182%end;
183%else %do;
184 data _null_;
185 infile &outfile lrecl=32767;
186 input;
187 list;
188 if _n_>50 then stop;
189 run;
190%end;
191/* END */
192%put &sysmacroname took %sysevalf(%sysfunc(datetime())-&dttm) seconds to run;
193
194%mend mp_chop;