stata memo

Stata memo

1. Drawing

(1) Time trend graph

label var year "年份"
label var per "制造业增加值比重[左轴]"
label var tjj "工业增加值比重[右轴]"
graph twoway (connect per year ,yaxis(1) color(black) )   ///
(connect tjj year ,yaxis(2) color(black) lpattern(dash)  ) ///
, graphregion(color(white))  xlabel(2003(2)2019) ///
ytitle("世界银行制造业增加值比重(%)",axis(1) height(5))  ///
ytitle("国家统计局工业增加值比重(%)",axis(2) height(5))  ///
note(数据来源:World Bank Open Data、国家统计局) xline(2011) 

Time Trend Figure 1
Equivalent command

tw (connect value1819 season , /// 
	lcolor(black) lpattern(dash) msymbol(O) mlcolor(gs5) mfcolor(gs12)) /// 
(connect value2020 season , ///
	lcolor(black) lpattern(solid) msymbol(S) mlcolor(gs5) mfcolor(gs12)) ///
,graphregion(color(white)) ///
legend(label(1 "18-19年平均") label(2 "2020年") ) ///
xlabel(1 "第一季度"  2 "第二季度"  3 "第三季度"  4 "第四季度"  ,labsize(small) )
label var year "年份"

tw bar mR1 year,yaxis(2) bc(balck) sort barwidth(0.9) fintensity(inten0) ///
ylabel(0(2000)6000, axis(2)) /// 
xlabel(2014(1)2021)|| /// 
connect percent_R year,yaxis(1) lc(black) lp(dash) mc(blace) ///
ylabel(0.5 "50%" 0.6 "60%" 0.7 "70%" 0.8 "80%" ,axis(1))  ||, ///
graphregion(color(white) ) ///
bgcolor(white) ///
title("中国数字内容企业(游戏)收入金额及占全球市场比重", c(black) size(*0.8)) ///
ytitle("占比(%)",axis(1) height(7))  ///
ytitle("收入额(百万美元)",axis(2) height(5))  /// 
legend(label(1 "中国数字内容企业(游戏)收入占全球市场比重") label(2 "中国数字内容企业(游戏)收入金额") ) ///
legend(size(small) col(1)) ///
note("数据源自:app annie")

graph save "Graph" "$path\output\playdata_1_percent_and_value_of_Chinese_Apps_Export.gph",replace

Time Trend Figure 2

use hs_adj_year_PQV_2000_2015.dta,clear
use hs_adj_year_PQV_2000_2015_cregime10,clear

merge m:1 hs_adj using equipment
replace BEC=4 if BEC==1 & equipment!=1

destring hs_adj,replace

reghdfe lnV i.year if year!=2006 & BEC==2, a(hs)
est store result_accessories

reghdfe lnV i.year if year!=2006 & BEC==4, a(hs)
est store result_equipment

reghdfe lnV i.year if year!=2006 & BEC==0, a(hs)
est store result_noncapital



#d ;
coefplot 
  (result_accessories,c(l) label("accessories") lp(dash) lc(black) mc(black) ms(smcircle_hollow) offset(-0.07))  
  (result_equipment ,c(l)  label("equipment") lp(solid) lc(black) mc(black) ms(smcircle_hollow))
  (result_noncapital,c(l)  label("noncapital") lp(dot) lc(black) mc(black) ms(smcircle_hollow) offset(0.07))
  , vertical 
  drop(_cons) byopts(xrescale) 
  xlabel(1 "2001"  3"2003"  5"2005" 6"2007"  8"2009"  10"2011" 12"2013" 14"2015")
  graphregion(color(white))
  legend(size(small) col(3))
  ;
#d cr

Time Trend Figure 3

font size option
font size option description
zero no size whatsoever, vanishingly small
minuscule smallest
quarter_tiny
third_tiny
half_tiny
tiny
vsmall
small
medsmall
medium
co-large
large
vlarge
huge
shut up largest
tenth one-tenth the size of the graph
quarter one-fourth the size of the graph
third one-third the size of the graph
half one-half the size of the graph
full text the size of the graph
size any size you want
Node style eg: msymbol(O) mlcolor(gs5) mfcolor(gs12)
symbolstyle Synonym(if any) Description
circle O solid
diamond D solid
triangle T solid
square S solid
plus +
X X
arrowf A filled arrow head
arrow a
pipe
V V
smcircle o solid
smdiamond d solid
smsquare s solid
smtriangle t solid
smplus
smx x
smv v
circle_hollow Oh hollow
diamond_hollow Dh hollow
triangle_hollow Th hollow
square_hollow Sh hollow
smcircle_hollow oh hollow
smdiamond_hollow dh hollow
smtriangle_hollow th hollow
smsquare_hollow sh hollow
point p a small dot
none i a symbol that is invisible
line style
linepatternstyle Description
solid solid line
dash dashed line
dot dotted line
dash_dot
shortdash
shortdash_dot
longdash
longdash_dot
blank invisible line
formula e.g.,-. or --… etc.
A formula is composed of any combination of
l solid line
_ (underscore) a long dash
- (hyphen) a medium dash
. short dash (almost a dot)
# small amount of blank space
颜色
black edkblue gs12 lime orange
blue eggshell gs13 ltblue orange_red
bluishgray eltblue gs14 ltbluishgray pink
bluishgray8 eltgreen gs15 ltbluishgray8 purple
brown emerald gs16 ltkhaki red
chocolate emidblue gs2 magenta sand
cranberry erose gs3 maroon sandb
cyan forest_green gs4 midblue sienna
dimgray gold gs5 midgreen stone
dkgreen gray gs6 mint sunflowerlime
dknavy green gs7 navy teal
dkorange gs0 gs8 navy8 white
ebblue gs1 gs9 none yellow
ebg gs10 khaki olive
edkbg gs11 lavender olive_teal

(2)柱状图

#delimit ;
graph bar cn_wzje_80 cn_wzje_81 if wzlx==0, over(sec) bargap(-30)
  ytitle("吸引外资金额")
  legend( label(1 "08年前") label(2 "08年后") )
  title("各行业吸引外资2008年前后对比")
  subtitle("中国中西部服务业")
  note("中国中西部服务业") ;
#delimit cr

insert image description here

(3)散点图

tw (scatter delta_v2_v3 delta_v1_v2 if delta_v1_v2>=-0.3& delta_v1_v2 <=2.3 ///
&delta_v2_v3>= -0.3&delta_v2_v3<=2.3, ///
mlabel(hy4) mlc(black) mlabc(black) ms(x) mlabs(tiny)) ///
(fun y=x,range(-0.3 2.3)) , ///
xlab(-0.3(0.5)2.3) ylab(-0.3(0.5)2.3)  ///
graphregion(color(white)) ///
xline(0,lp(dash) lc(gs10)) ///
yline(0,lp(dash) lc(gs10)) ///
legend(ring(0) pos(5) order(2 "45°线")) ///
ytitle("11-15时段的增速") ///
xtitle("07-11时段的增速")

insert image description here

graph twoway (scatter wzje_CE0308 wzje_CE1419, mlabel(sec) mlabv(sec) ) (function y=x, range(0 0.11)) , ///
title("中国东部地区03-08对比中国东部地区14-19[金额]")  ///
ytitle("中国东部03-08") ///
xtitle("中国东部14-19") ///
legend(ring(0) pos(5) order(2 "45°线")) ///
graphregion(color(white))

insert image description here

graph twoway (scatter c_AS c_WD, mlabel(cic03)  ) (lfit c_AS c_WD) , ///
title("东盟增速放缓 vs 世界增速放缓")  ///
ytitle("东盟")  ///
xtitle("世界") ///
legend(ring(0) pos(5) order(2 "拟合")) ///
graphregion(color(white))

(4)bgshade

bgshade ks, shaders(uu9)  ///
   twoway(connect lamda22 ks if treat==1&ks>=6&ks<=11 ||             ///  
          connect lamda22 ks if treat==0&ks>=6&ks<=11 , xlab(6(1)11) ///
   title("新冠疫情冲击下企业平均收入变化趋势"))

insert image description here

(5)coefplot

coefplot,  levels(90) vertical lcolor(black)mcolor(black) ///
 msymbol(circle_hollow) ytitle(估计系数, size(small)) ///
 ylabel(, labsize(small) angle(horizontal) nogrid) ///
 yline(0, lwidth(vthin)lpattern(solid) lcolor(black)) ///
 xtitle(事件发生时间, size(small)) ///
  title("(B)企业缴税的平行趋势检验") ///
 xlabel(0"." 1"2019s2" 2"2019s3" 3"2019s4" 4"2020s1" 5"2020s2" 6"2020s3" 7"2020s4") 

insert image description here

reghdfe lnQ i.Year if elec == 1,a(i.citycode) vce(r)
est store elec_Q_1 

reghdfe lnV i.Year if elec == 1,a(i.citycode) vce(r)
est store elec_V_1 

reghdfe lnQ i.Year if elec == 0,a(i.citycode) vce(r)
est store elec_Q_0

reghdfe lnV i.Year if elec == 0,a(i.citycode) vce(r)
est store elec_V_0

coefplot (elec_Q_1,label("半导体电子元件相关企业进口数量") offset(0.05)  pstyle(p3)) ///
(elec_Q_0 ,label("非半导体电子元件相关企业进口数量") offset(-0.05)  pstyle(p4) ), ///
vertical drop(_cons) xline(0) ///
graphregion(color(white)) /// 
yline(0) ///
addplot(line @b @at,lp(dash) lwidth(*0.5)) /// 
legend(label(1 "半导体电子元件相关企业进口数量") label(2 "非半导体电子元件相关企业进口数量") ) 

insert image description here

(6)画系数和置信区间

twoway (scatter coef week) /// 
(rcap ci_lower ci_upper week, /// 
 lcolor(black) /// 
 mcolor(black) /// 
 lwidth(vthin) /// 
 lpattern(dash) ///
 msymbol(circle_hollow) ///
 legend(label(2 "99% CI"))) , ///
yline(0) ///
xtitle("")  ///
graphregion(fcolor(white)) ///
title("第X周的系数", size(medium)) /// 
name("Coef_all_I", replace)

(7)画直方图

一般使用kdensity

hist year if year>=1400 & year<=2010, freq bin(200) ylabel(0(500)2500) xtitle("Year") xline(1950 1980,lw(thin)) ///
     text(1500 1950 "Year=1950", place(w)) text(2000 1980 "Year=1980", place(w)) 

insert image description here

(8)画桑基图

cd $path\appdata
use Data_games.dta,clear
merge m:1 ParentCompanyName using "$path\data\company_city"
keep if _m == 3
drop _m
gen from = city_code
encode iso3_j,gen(to)
bys from to :egen tR = total(Revenue)
bys from to :egen tD = total(Downloads)
duplicates drop  from to ,force
gen x0 = 1
gen x1 = 2
tostring city_code ,gen(city2)
drop if dest == "CHN"

sankey_plot x0 from x1 to, ///
width0(tR) extra xlabel(1 "Source" 2 "Destination", nogrid labsize(small)) ///
colorpalette(economist, opacity(30)) ///
label0(city) label1(iso3_j) ///
labsize(*0.6) labcolor(black) ///
graphregion(color(white))  gap(0.1) ///
title("地级市层面Apps出海流向(按收入额)",color(black) size(*0.8))

graph save "Graph" "$path\output\sankey_R_0228.gph",replace


sankey_plot x0 from x1 to, ///
width0(tD) extra xlabel(1 "Source" 2 "Destination", nogrid labsize(small)) ///
colorpalette(economist, opacity(30)) ///
label0(city) label1(iso3_j) ///
labsize(*0.6) labcolor(black) ///
graphregion(color(white))  gap(0.1) ///
title("地级市层面Apps出海流向(按下载量)",color(black) size(*0.8))

graph save "Graph" "$path\output\sankey_D_0228.gph",replace

insert image description here

(9)气泡图

twoway(scatter mv T_gap_05_00 [fweight=N] if BEC == 0&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(ebblue%40)) ///
(scatter mv T_gap_05_00 [fweight=N] if BEC == 4&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(orange_red%40)) ///
(scatter mv T_gap_05_00 [fweight=N] if BEC == 2&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(green%20)) ///
, legend(label(1 "非资本品") label(2 "equipment") label(3 "accessories") ) 

insert image description here

2. 处理数据

(1)拓展expand数据

For example: there are 6 samples of 9523, 9524, 9525, 9526, 9527, and 9528 in the current data. Now I want to expand these 6 samples according to freq to become 9523, 9524_1, 9524_2, 9525_1, 9525_2, 9526_1, 9526_2, 11 samples of 9527_1, 9527_2, 9528_1, 9528_2

freq count value
1 9523 4845.1143
2 9524 969.66498
2 9525 129.53349
2 9526 71284.508
2 9527 1038.127
2 9528 445877.09

count is the unique identification code of id, the expandcl function can generate samples with the same line as freq, and generate a new id identification code freq_count

egen count=group(id hs02_6)
expandcl freq,gen(freq_count) cluster(count)
drop freq_count

(2) Time data

gen R= mdy(month_r,day_r,year_r)
gen week_r = week(R)
gen day_r = day(R)
gen dow_r = dow(R)  //返回周几
gen doy_r = doy(R)  //返回年内日期
gen yw_r = yw(year_r,week_r)
gen ed = yw - yw_r
//yw ym yq yh分别为年周、年月、年季、年半年
gen period_kb= date(date_u,"YMD")-date(date_kb,"YMD")

(3) Common functions

int(x) //取整,不论后面的小数是什么,只取小数点前的数值
round(x) // 四舍五入取整
round(x, .01) //保留两位小数四舍五入
gen y = sum(x)  //求列累积和
egen y = sum(x) //求列总和
egen y = rsum(x y z) //求x+y+z总和
egen y = rowmean(x y z) //求(x+y+z)/3
egen y = rowsd(x y z) //求x y z的方差
egen y = rowmim(x y z) //求x y z的最小值
egen y = rowmax(x y z) //求x y z的最大值
egen y = mean(x)      //求列均值
egen y = median(x)    //求列中位数
egen y = std(x)       //求变异系数,与方差不同

bysort x(y): gen z = y[1] //按照x分组,分组后按照y排序,生成一个新变量z=y的第一个观察值

(4) shrink tail processing

foreach v of var DexpoAS4- DlnexpoWD2{
gen `v'_w=`v'
qui su `v',det
replace `v'_w=r(p99) if `v'>r(p99) & `v'<.
replace `v'_w=r(p1) if `v'<r(p1)
}

winsor2 wage, replace cuts(1 99) trim

summary After a variable, the results that can be returned are

r(N)           //number of observations
r(mean)        //mean
r(skewness)    //skewness (detail only)
r(min)         //minimum
r(max)         //maximum
r(sum_w)       //sum of the weights
r(p1)          //1st percentile (detail only)
r(p5)          //5th percentile (detail only)
r(p10)         //10th percentile (detail only)
r(p25)         //25th percentile (detail only)
r(p50)         //50th percentile (detail only)
r(p75)         //75th percentile (detail only)
r(p90)         //90th percentile (detail only)
r(p95)         //95th percentile (detail only)
r(p99)         //99th percentile (detail only)
r(Var)         //variance
r(kurtosis)    //kurtosis (detail only)
r(sum)         //sum of variable
r(sd)          //standard deviation

(5) Create a folder

Generate a workspace folder under the project path
and then generate subfolders to store data (data), control variables (controlvars), temporary data (tempdata), and regression results (outreg)

efolder, cd(D:\stata15\project\workspace)
efolder, cd(D:\stata15\project\workspace sub(data controlvars tempdata outreg)

(6) Group processing data (alternative to bysort)

*展示根据highzupu50(族谱)和year分组后的变量drqianfen(死亡率)均值;
collapse (mean) drqianfen, by(highzupu50 year)

(7) Define a sample without missing

g rsample = !mi(avggrain_fyr) & !mi(nograin_fyr) & !mi(urban_fyr)& !mi(dis_bj_fyr) & !mi(dis_pc_fyr) & !mi(migrants_fyr)& !mi(rice_fyr) & !mi(minor_fyr) & !mi(edu_fyr)

(8) Define a new alternative for Dummy (time range)

*如果yob满足1825≤yob≤1899则pre取值为1,否则pre取值为0。mid、post生成过程类似。
gen pre = inrange(yob, 1825, 1899)
gen mid = inrange(yob, 1899, 1919)
gen post = inrange(yob, 1920, 1960)

(9) Quick replacement

recode treatyear (1969 = 1) (1979 = 2) (1989 = 3) (1999 = 4) (2009 = 5)

3. Processing characters

(1) Replace character

replace 候选人姓名=subinstr( 候选人姓名, " ", "",. )

(2) Capture certain features in characters

keep if strmatch(city, "*山东*")
gen temp = 1 if strmatch(reporteriso3, "A*")

(3) Extract characters, retrieve specific characters

//从enddate字符1开始取,取4个字符赋给year
gen year = substr(enddate,1,4)  

//strpos(s1, s2)返回字符s2在s1中的位置,如果s1中找不到s2,则返回0,将该判断再赋给y
gen y = strpos(s1, s2) != 0    

4. Output the result

(1) Regular output

outreg2 using "E:\mfg\outreg\r2", word append addtext(CountryFE, YES,YearFE, YES)

(2) The first stage of iv regression output

eststo: xtivreg p_a_w (DexpoCN4_w=dexpo44) /// 
 i.t c.expr0#t c.Lshare0#t /// 
 c.lnGDP0#t c.lngdp0#t, fe first vce(cluster c)  
 
 eststo: xtreg DexpoCN4_w dexpo44 /// 
 i.t c.expr0#t c.Lshare0#t /// 
 c.lnGDP0#t c.lngdp0#t if e(sample)==1 ,fe  
 cd $path\outreg 
 outreg2 using "table3", word replace addtext(CityFE, YES,YearFE, YES) keep(dexpo44) 

(3) Variable descriptive statistics

*列出inv等变量的样本数、均值、标准差、最小值和最大值。
tabstat inv loginv log_levies ///
     logpopl logincome logasset hhsize landpc logmigration logtax logtransfer share_admin  ///
	 postcont postopen secret_ballot proxy_voting moving_ballot ///
	 , s(N mean sd min max) c(s)

5. Matrix save result

mat T1 = J(3,3,.)

reghdfe temp ib1.season if year == 2018 & treat == 1,noa
forvalues i = 1/3{
	local j = `i' + 1
	mat T1[`i',1] = _b[`j'.season]
}

reghdfe temp ib1.season if year == 2019 & treat == 1,noa
forvalues i = 1/3{
	local j = `i' + 1
	mat T1[`i',2] = _b[`j'.season]
}

reghdfe temp ib1.season if year == 2020 & treat == 1,noa
forvalues i = 1/3{
	mat T1[`i',3] = _b[`=1+`i''.season]
}

svmat T1

6. Import data (full string)

forv i = 2000/2003{
	cd E:\Data\EPS工企海关匹配库\origindata
	import delimited "工企+海关(`i').csv", stringcols(_all) clear 
	cd E:\Data\EPS工企海关匹配库
	save data`i'.dta,replace
}

7. Loop

clear all
set obs 1000

**#** 用forvalues循环对单一变量进行处理

gen id = .    
//生成一个变量名为id,代表第几个人,假设一共有50个人
//假设每个人都有20个观测值,代表20年

forvalues i = 1/50 {
	local j = `i' - 1				//暂时定义0~49 方便计算
	local lower = `j' * 20  +1		//定义下限 1、21、41、61 
	local upper = `j' * 20 + 20		//定义上限 20、40、60、80
									//由此就定义了 1~20  21~40 41~60 ……
	replace id = `i' in `lower'/`upper'		//给第1~20行,赋值为第1个人
											//给第21~40行,赋值为第2个人
}

bys id : gen T = _n + 2000					//对于每个人,都生成一个时间序列



**#** 用forvalues循环对多个变量进行处理

forvalues i = 1/5 {
	gen value`i' = .
	cap gen e = rnormal()
	replace value`i' = e * 10 + `i'
	cap drop e
}

// 等价于 
gen value6 = .
cap gen e = rnormal()
replace value6 = e * 10 + 6
cap drop e

gen value7 = .
cap gen e = rnormal()
replace value7 = e * 10 + 7
cap drop e

gen value8 = .
cap gen e = rnormal()
replace value8 = e * 10 + 8
cap drop e

gen value9 = .
cap gen e = rnormal()
replace value9 = e * 10 + 9
cap drop e

gen value10 = .
cap gen e = rnormal()
replace value10 = e * 10 + 10
cap drop e


**#** 用while循环对单一变量进行处理
// 只要时间在T=11和T=20之间,就对value1~value10进行 " 乘0.1"的处理
local i = 2010 
while `i' < 2020 {
	forvalues j = 1/10{
		replace value`j' = value`j' * 0.1 if T == `i'
	}
	local i = `i' + 1
}

**#** 用foreach对变量进行处理
foreach v in value1 value2 value3 value4 value5  {
	su `v' ,d
	replace `v' = (`v' - r(min)) / (r(max) - r(min))
	kdensity `v'
}

// 等价于 
su value6,d 
replace value6 = (value6 - r(min)) / (r(max) - r(min))
kdensity value6

su value7,d 
replace value7 = (value7 - r(min)) / (r(max) - r(min))
kdensity value7

su value8,d 
replace value8 = (value8 - r(min)) / (r(max) - r(min))
kdensity value8

su value9,d 
replace value9 = (value9 - r(min)) / (r(max) - r(min))
kdensity value9

su value10,d 
replace value10 = (value10 - r(min)) / (r(max) - r(min))
kdensity value10

8. Use the first line as label/varname

* 把第一行作为变量标签
labone,nrow(1) 

* 把第一行作为变量名(不保留第一行)
nrow

* 把第一行作为变量名(保留第一行)
nrow,keep

おすすめ

転載: blog.csdn.net/weixin_43168119/article/details/129475330