importnumpyasnpimportmatplotlib.pyplotaspltimportrandom,mathimportrequestsdefsign(x):return1ifx>0else-1defstump_decision(D,N,dim):D=sorted(D,key=lambdad:d[0][dim])err=0fordinD:# theta = 0 => all y is 1err+=1ifd[1]!=1else0err2=N-errbest=[1,0,err]forthetainrange(1,N):# y = 0 if index < theta else 1# only y[theta-1] changes: 1 -> 0err+=1ifD[theta-1][1]==1else-1iferr<best[2]:best[0]=1best[1]=(D[theta][0][dim]+D[theta-1][0][dim])/2iftheta!=0else(D[theta][0][dim]+-1)/2best[2]=err# s = -1forthetainrange(0,N):# y = 1 if index < theta else 0err2+=1ifD[theta-1][1]==-1else-1iferr2<best[2]:best[0]=-1best[1]=(D[theta][0][dim]+D[theta-1][0][dim])/2iftheta!=0else(D[theta][0][dim]+-1)/2best[2]=err2returntuple(best)defget_data(url):raw=requests.get(url).textD=[]forlninraw.strip().split("\n"):ln=ln.strip().split(" ")D.append((np.array([float(x)forxinln[:-1]]),int(ln[-1])))returnDdefmulti_dim_stump(D):max_dim=len(D[0][0])N=len(D)best=(0,0,1<<61)dim=-1fordinrange(max_dim):g=stump_decision(D,N,d)ifg[2]<best[2]:best=gdim=dreturn(best[0],best[1],dim,best[2])defcalc_Eout(D,s,theta,dim):err=0fordinD:ifs*sign(d[0][dim]-theta)!=d[1]:err+=1returnerr/len(D)if__name__=="__main__":url_train='https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_math/hw2_train.dat'D_train=get_data(url_train)g=multi_dim_stump(D_train)print(f"Selected hypothesis is: s = {g[0]}, theta = {g[1]}, dim = {g[2]}, E_in = {g[3]/len(D_train)}")url_test='https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_math/hw2_test.dat'D_test=get_data(url_test)E_out=calc_Eout(D_test,g[0],g[1],g[2])print(f"E_out = {E_out}")
importnumpyasnpimportmatplotlib.pyplotaspltimportrandom,mathN=20defsign(x):return1ifx>0else-1defgen_x(lb=-1,ub=1):returnnp.random.uniform(lb,ub,size=N)defgen_y(X):Y=[]forxinX:y=sign(x)Y.append(-yifrandom.random()<=0.2elsey)returnYdefstump_decision(D):err=0fordinD:# theta = 0 => all y is 1err+=1ifd[1]!=1else0err2=N-errbest=[1,0,err]forthetainrange(1,N):# y = 0 if index < theta else 1# only y[theta-1] changes: 1 -> 0err+=1ifD[theta-1][1]==1else-1iferr<best[2]:best[0]=1best[1]=(D[theta][0]+D[theta-1][0])/2iftheta!=0else(D[theta][0]+-1)/2best[2]=err# s = -1forthetainrange(0,N):# y = 1 if index < theta else 0err2+=1ifD[theta-1][1]==-1else-1iferr2<best[2]:best[0]=-1best[1]=(D[theta][0]+D[theta-1][0])/2iftheta!=0else(D[theta][0]+-1)/2best[2]=err2returntuple(best)if__name__=="__main__":T=1000Ein_avg=0.0Eout_avg=0.0diff_avg=0.0diff_list=[]for_inrange(T):X=gen_x()X.sort()Y=gen_y(X)D=[(x,y)forx,yinzip(X,Y)]g=stump_decision(D)s,theta,E_in=g[0],g[1],(g[2]/N)E_out=0.5+0.3*s*(np.abs(theta)-1)Ein_avg+=E_inEout_avg+=E_outdiff_avg+=E_in-E_outdiff_list.append(E_in-E_out)fig,ax=plt.subplots()num_bins=100n,bins,patches=ax.hist(diff_list,num_bins,facecolor='lightskyblue',alpha=0.5)ifN==20:ax.set_title(f"Figure 1: 1-D decision stump with small N")else:ax.set_title(f"Figure 2: 1-D decision stump with big N")ax.set_xlabel(r"$E_{in} - E_{out}$")ax.set_ylabel(r"frequency")fig.tight_layout()plt.grid(True)plt.show()print(f"Average E_in = {Ein_avg/T}")print(f"Average E_out = {Eout_avg/T}")print(f"Average E_in - E_out = {diff_avg/T}")
importnumpyasnpimportmatplotlib.pyplotaspltimportrandom,mathN=2000defsign(x):return1ifx>0else-1defgen_x(lb=-1,ub=1):returnnp.random.uniform(lb,ub,size=N)defgen_y(X):Y=[]forxinX:y=sign(x)Y.append(-yifrandom.random()<=0.2elsey)returnYdefstump_decision(D):err=0fordinD:# theta = 0 => all y is 1err+=1ifd[1]!=1else0err2=N-errbest=[1,0,err]forthetainrange(1,N):# y = 0 if index < theta else 1# only y[theta-1] changes: 1 -> 0err+=1ifD[theta-1][1]==1else-1iferr<best[2]:best[0]=1best[1]=(D[theta][0]+D[theta-1][0])/2iftheta!=0else(D[theta][0]+-1)/2best[2]=err# s = -1forthetainrange(0,N):# y = 1 if index < theta else 0err2+=1ifD[theta-1][1]==-1else-1iferr2<best[2]:best[0]=-1best[1]=(D[theta][0]+D[theta-1][0])/2iftheta!=0else(D[theta][0]+-1)/2best[2]=err2returntuple(best)if__name__=="__main__":T=1000Ein_avg=0.0Eout_avg=0.0diff_avg=0.0diff_list=[]for_inrange(T):X=gen_x()X.sort()Y=gen_y(X)D=[(x,y)forx,yinzip(X,Y)]g=stump_decision(D)s,theta,E_in=g[0],g[1],(g[2]/N)E_out=0.5+0.3*s*(np.abs(theta)-1)Ein_avg+=E_inEout_avg+=E_outdiff_avg+=E_in-E_outdiff_list.append(E_in-E_out)fig,ax=plt.subplots()num_bins=100n,bins,patches=ax.hist(diff_list,num_bins,facecolor='lightskyblue',alpha=0.5)ifN==20:ax.set_title(f"Figure 1: 1-D decision stump with small N")else:ax.set_title(f"Figure 2: 1-D decision stump with big N")ax.set_xlabel(r"$E_{in} - E_{out}$")ax.set_ylabel(r"frequency")fig.tight_layout()plt.grid(True)plt.show()print(f"Average E_in = {Ein_avg/T}")print(f"Average E_out = {Eout_avg/T}")print(f"Average E_in - E_out = {diff_avg/T}")