注册 登录  
 加关注
   显示下一条  |  关闭
温馨提示!由于新浪微博认证机制调整,您的新浪微博帐号绑定已过期,请重新绑定!立即重新绑定新浪微博》  |  关闭

数据挖掘

学习数据挖掘

 
 
 

日志

 
 

SAS-随机抽样&分层抽样  

2013-05-25 20:22:38|  分类: SAS |  标签: |举报 |字号 订阅

  下载LOFTER 我的照片书  |
利用uniform函数,随机抽样30个
data all;
do i=1 to 100;
output;
end;
run;
data smp;
set all;
rdm=uniform(0);
run;
proc sort data=smp out=out_smp; by rdm;run;
data out_smp;
set out_smp;
if _n_ le 30;
run;
proc sort data=out_smp; by i ;run;

-------利用抽样过程,进行抽样

PROC SURVEYSELECT一般形式:

   Proc surveyselect data=<原数据集>

            Method=<srs|sys|urs|  /*抽样方法选择*/

            out=<抽取样本存放的数据集>

n=<抽取数量>(or samprate=抽样比例)

seed=n;

            strata <指定分层变量>;

            id <指定抽取的样本所保留的源数据集变量>;

run;

举例:建立数据集a1000个数据:data a ;do i = 1 to 1000 ;output ;end ;run ;

1简单无重复随机抽样,从中抽取100个数据

proc surveyselect data = a method = srs n = 100 out= b seed = 25070419 ;run ;

2、简单随机抽样,按10%的比例抽取:

proc surveyselect data =a method=srs samprate=0.1  out=b seed=25070419;run ;

3、系统抽样:在 1000 个数据中每隔 10 个抽取 1 个:

proc surveyselect  data = a  method = sys  sampsize =100 out = b seed = 25070416 ;run ;

4分层等比例随机抽样举例

proc sort data=test2;

by 分层变量;

run; /**先用分层变量对总体样本进行排序/

proc surveyselect  data=test2 out=results2 method=srs samprate=0.1;

strata 分层变量;

run;  /*根据分层变量等比例从总体中抽取样本*/

5、分层不等比例抽样举例;

1)手工设置抽样比例或者抽样数

proc sort data=test3;

by 分层变量;

run;                                                                   /**先用分层变量对总体样本进行排序/

proc surveyselect  data=test3 out=results3 method=srs

 samprate=(0.1,0.3,0.5,0.2);                           /*根据分层情况设置每一层要抽取的比例*/

strata 分层变量;

run;                                                                /*根据分层变量不等比例从总体中抽取样本*/

proc surveyselect  data=test3 out=results3 method=srs

n=(30,20,50,40);                           /*根据分层情况设置每一层要抽取的样本数*/

strata 分层变量;

run;             

2)根据抽样表进行不等比例抽样

proc sort data=test3;

by 分层变量;

run;                                                                   /**先用分层变量对总体样本进行排序/

proc surveyselect  data=test3 out=results3 method=SRS

samprate=samp_table;                           /*通过抽样比例数据集进行抽样,samp_table数据集中要包括分层变量 以及每一分层对应的抽样比例或者数量,如果按比例抽样变量必须用_rate_来命名抽样比例,如果是按数量抽样必须用_nsize_来命名抽样数量*/

strata 分层变量;

run;             

 

----------------------利用data步进行抽样

%let samplesize=100;

data tmp;

set sashelp.prdsale nobs=nobs;

retain _cnt_ 0;

if &samplesize>_cnt_ and ranuni(0)*(nobs+1- _cnt_)<(&samplesize-_cnt_) then do;

_cnt_+1;

output;

end;

drop _cnt_;

run;

%随机抽样MAcro

libname survey "E:\bookdata\chapt12";

options mprint mlogic mstored sasmstore=survey;

%macro Survey(Input=,Method=,outhits=,Reps=,Strata=,Variable=,Nr=,N=,Rate=,Output=)/Store;

/*判断抽样数据集合是否存在*/

%if %sysfunc(exist(&Input))=0 %then %do;

%put ERorr: &Input data set do not exits;

%Return;

%end;

/*判断采用什么样的抽样方法*/

%if &Strata eq %then %do;

/*次数采用简单随机抽样,判断是按照数量抽样还是按照比例抽样,NR标记采用什么方式抽样*/

/*        抽取指定记录条数的样本*/

%if &N=N %then %do;

proc surveyselect data=&Input noprint

method=&method

&Nr=&N

out=&output

&outhits.Reps=&Reps.;

run;

%end;


/* 抽取指定比例的样本、*/

%else %if &N=R %then %do;

proc surveyselect data=&Input noprint

method=&method

&Nr=&Rate

out=&output

&outhits.Reps=&Reps.;

run;

%end;


  %else %do;

 %put ERorr: the value &NR is only N OR R;

 %end;

%end;

%else %if &Strata=Strata %then %do;

%if &Nr=N %then %do;

/* 检查输入的N值是数据还是数据集合*/

%if %substr(&N,1,1) eq "(" %then %do;

%put %substr(&N,1,1);

        /*首先对数据集合进行排序*/

proc sort data=&Input out=sort_&Input;

by &Variable;

run;



proc surveyselect data=sort_&input noprint

method=&method

&Nr=&Rate

out=&output

&outhits.Reps=&Reps.;

&Strata=&Variable;

run;

%end;

/*        如果N是数据集合*/

%else %do;

%let dsid=%sysfunc(open(&N));

    %if &Dist gt 0 %then %do;

    %if %sysfunc(varnum(&dsid,_Nsize_))<=0 %then %do;

%let rc=%sysfunc(close(&dsid));

%put error:&N do not exist the field _Nsize_!;

%return;

%end;

%let rc=%sysfunc(close(&dsid));

%end;

   %else %do;

                 %let rc=%sysfunc(close(&dsid));

%put error:&N do not exist

%end;

   

  proc sort data=&Input out=sort_&Input;

          by &Variable;

          run;


              proc surveyselect data=sort_&input noprint

method=&method

&Nr=&Rate

out=&output

&outhits.Reps=&Reps.;

&Strata=&Variable;

run;

%end;

%end;

%else %if &Nr=R %then %do;

/* 输入的为比例值确定比例值的形式*/

%if %substr(&Rate,1,1) eq "(" %then %do;

%put %substr(&N,1,1);

        /*首先对数据集合进行排序*/

proc sort data=&Input out=sort_&Input;

by &Variable;

run;



proc surveyselect data=sort_&input noprint

method=&method

Rate=&Rate

out=&output

&outhits.Reps=&Reps.;

&Strata=&Variable;

run;

%end;

/*        如果Rate是数据集合*/

%else %do;

%let dsid=%sysfunc(open(&N));

    %if &Dist gt 0 %then %do;

    %if %sysfunc(varnum(&dsid,_Rate_))<=0 %then %do;

%let rc=%sysfunc(close(&dsid));

%put error:&N do not exist the field _Nsize_!;

%return;

%end;

%let rc=%sysfunc(close(&dsid));

%end;

   %else %do;

                 %let rc=%sysfunc(close(&dsid));

%put error:&N do not exist

%end;

   

  proc sort data=&Input out=sort_&Input;

          by &Variable;

          run;


              proc surveyselect data=sort_&input noprint

method=&method

RAte=&Rate

out=&output

&outhits.Reps=&Reps.;

&Strata=&Variable;

run;

%end;

%end;

%else %do;

%put error: the value &Nr is only N or R!;

%end;

%end;

%else %do;

%put error: the value &strata is only strata or null!

%end;    

%mend;


libname survey "E:\bookdata\chapt12";

options mprint mlogic mstored sasmstore=survey;

data class1(drop=name rename=(temp=name));

set sashelp.class;

temp=substr(name,1,3);

run;

data class;

set sashelp.class class1;

run;

data basic;

set class;

n=_n_;

length age_cde $20;

if age le 12 then age_cde='<12';

else if 13<=age<14 then age_cde='13-14';

else age_cde='>15';

run;


proc sort data=basic;

by sex age_cde;

run;


data _null;

set basic nobs=nobs;

call symput('nobs',nobs);

stop;

run;

%put &nobs;


proc sql;

create table para as

select 

sex,

age_cde,

count(*) as _Nsize_,

count(*)/&nobs as _RAte_

from basic

group by 1,2;

quit;

%survey(Input=basic,method=srs,outhits=,Reps=1,Strata=,Variable=,Nr=N,N=4,Rate=,output=test);

%survey(Input=basic,method=urs,outhits=outhits,Reps=2,Strata=,Variable=,Nr=N,N=4,Rate=,output=test);

%survey(Input=basic,method=urs,outhits=outhits,Reps=2,Strata=Strata,Variable=sex,Nr=N,N=(4,5),Rate=,output=test);

%survey(Input=basic,method=urs,outhits=outhits,Reps=2,Strata=Strata,Variable=sex,Nr=R,N=,Rate=(0.4,0.5),output=test);

%survey(Input=basic,method=urs,outhits=outhits,Reps=2,Strata=Strata,Variable=sex age_cde,Nr=N,N=para,Rate=,output=test);


来自:http://yanyk.dxyer.cn/8215_10/

       《SAS编程与数据挖掘商业案例》

  评论这张
 
阅读(713)| 评论(0)
推荐 转载

历史上的今天

评论

<#--最新日志,群博日志--> <#--推荐日志--> <#--引用记录--> <#--博主推荐--> <#--随机阅读--> <#--首页推荐--> <#--历史上的今天--> <#--被推荐日志--> <#--上一篇,下一篇--> <#-- 热度 --> <#-- 网易新闻广告 --> <#--右边模块结构--> <#--评论模块结构--> <#--引用模块结构--> <#--博主发起的投票-->
 
 
 
 
 
 
 
 
 
 
 
 
 
 

页脚

网易公司版权所有 ©1997-2017