作者 | 黄伟呢
来源 | 数据分析与统计学之美
今天,我继续为大家讲述Pandas如何实现R语言的相关操作。
由于 Pandas 旨在提供人们使用 R 进行的大量数据操作和分析功能,因此本页开始提供更详细的 R 语言及其与 Pandas 相关的许多第三方库的介绍。
与 R 和 CRAN 库相比,我们关心以下几点:
df<-data.frame(a=rnorm(5),b=rnorm(5),c=rnorm(5),d=rnorm(5),e=rnorm(5))
df[,c("a","c","e")]
df<-data.frame(matrix(rnorm(1000),ncol=100))
df[,c(1:10,25:30,40,50:100)]
df=pd.DataFrame(np.random.randn(10,3),columns=list("abc"))
df[["a","c"]]
df.loc[:,["a","c"]]
named=list("abcdefg")
n=30
columns=named+np.arange(len(named),n).tolist()
df=pd.DataFrame(np.random.randn(n,n),columns=columns)
df.iloc[:,np.r_[:10,24:30]]
df<-data.frame(
v1=c(1,3,5,7,8,3,5,NA,4,5,7,9),
v2=c(11,33,55,77,88,33,55,NA,44,55,77,99),
by1=c("red","blue",1,2,NA,"big",1,2,"red",1,NA,12),
by2=c("wet","dry",99,95,NA,"damp",95,99,"red",99,NA,NA))
aggregate(x=df[,c("v1","v2")],by=list(mydf2$by1,mydf2$by2),FUN=mean)
df=pd.DataFrame({
"v1":[1,3,5,7,8,3,5,np.nan,4,5,7,9],
"v2":[11,33,55,77,88,33,55,np.nan,44,55,77,99],
"by1":["red","blue",1,2,np.nan,"big",1,2,"red",1,np.nan,12],
"by2":["wet","dry",99,95,np.nan,"damp",95,99,"red",99,np.nan,np.nan,]
})
g=df.groupby(["by1","by2"])
g[["v1","v2"]].mean()
s<-0:4
s%in%c(2,4)
s=pd.Series(np.arange(5),dtype=np.float32)
s.isin([2,4])
s<-0:4
match(s,c(2,4))
baseball<-
data.frame(team=gl(5,5,
labels=paste("Team",LETTERS[1:5])),
player=sample(letters,25),
batting.average=runif(25,.200,.400))
tapply(baseball$batting.average,baseball.example$team,max)
importrandom
importstring
baseball=pd.DataFrame(
{
"team":["team%d"%(x+1)forxinrange(5)]*5,
"player":random.sample(list(string.ascii_lowercase),25),
"battingavg":np.random.uniform(0.200,0.400,25)
})
baseball.pivot_table(values="battingavg",columns="team",aggfunc=np.max)
df<-data.frame(a=rnorm(10),b=rnorm(10))
subset(df,a<=b)
df[df$a<=df$b,]
df=pd.DataFrame({"a":np.random.randn(10),"b":np.random.randn(10)})
df.query("a<=b")
df[df["a"]<=df["b"]]
df.loc[df["a"]<=df["b"]]
df<-data.frame(a=rnorm(10),b=rnorm(10))
with(df,a+b)
df$a+df$b
df=pd.DataFrame({"a":np.random.randn(10),"b":np.random.randn(10)})
df.eval("a+b")
df["a"]+df["b"]
require(plyr)
df<-data.frame(
x=runif(120,1,168),
y=runif(120,7,334),
z=runif(120,1.7,20.7),
month=rep(c(5,6,7,8),30),
week=sample(1:4,120,TRUE)
)
ddply(df,.(month,week),summarize,
mean=round(mean(x),2),
sd=round(sd(x),2))
df=pd.DataFrame(
{
"x":np.random.uniform(1.0,168.0,120),
"y":np.random.uniform(7.0,334.0,120),
"z":np.random.uniform(1.7,20.7,120),
"month":[5,6,7,8]*30,
"week":np.random.randint(1,4,120)
})
grouped=df.groupby(["month","week"])
grouped["x"].agg([np.mean,np.std])
a<-array(c(1:23,NA),c(2,3,4))
data.frame(melt(a))
a=np.array(list(range(1,24))+[np.NAN]).reshape(2,3,4)
pd.DataFrame([tuple(list(x)+[val])forx,valinnp.ndenumerate(a)])
a<-aslist(c(1:4,NA))
data.frame(melt(a))
a=list(enumerate(list(range(1,5))+[np.NAN]))
pd.DataFrame(a)
cheese<-data.frame(
first=c('John','Mary'),
last=c('Doe','Bo'),
height=c(5.5,6.0),
weight=c(130,150)
)
melt(cheese,id=c("first","last"))
cheese=pd.DataFrame(
{
"first":["John","Mary"],
"last":["Doe","Bo"],
"height":[5.5,6.0],
"weight":[130,150]}
)
pd.melt(cheese,id_vars=["first","last"])
cheese.set_index(["first","last"]).stack()#alternativeway
df<-data.frame(
x=runif(12,1,168),
y=runif(12,7,334),
z=runif(12,1.7,20.7),
month=rep(c(5,6,7),4),
week=rep(c(1,2),6)
)
mdf<-melt(df,id=c("month","week"))
acast(mdf,week~month~variable,mean)
df=pd.DataFrame(
{"x":np.random.uniform(1.0,168.0,12),
"y":np.random.uniform(7.0,334.0,12),
"z":np.random.uniform(1.7,20.7,12),
"month":[5,6,7]*4,
"week":[1,2]*6}
)
mdf=pd.melt(df,id_vars=["month","week"])
pd.pivot_table(
mdf,
values="value",
index=["variable","week"],
columns=["month"],
aggfunc=np.mean
)
df<-data.frame(
Animal=c('Animal1','Animal2','Animal3','Animal2','Animal1',
'Animal2','Animal3'),
FeedType=c('A','B','A','A','B','B','A'),
Amount=c(10,7,4,2,5,6,2)
)
dcast(df,Animal~FeedType,sum,fill=NaN)
#AlternativemethodusingbaseR
with(df,tapply(Amount,list(Animal,FeedType),sum))
df=pd.DataFrame({
"Animal":["Animal1","Animal2","Animal3","Animal2","Animal1","Animal2","Animal3",],
"FeedType":["A","B","A","A","B","B","A"],
"Amount":[10,7,4,2,5,6,2]}
)
df.pivot_table(values="Amount",index="Animal",columns="FeedType",aggfunc="sum")
df.groupby(["Animal","FeedType"])["Amount"].sum()
cut(c(1,2,3,4,5,6),3)
factor(c(1,2,3,2,2,3))
pd.cut(pd.Series([1,2,3,4,5,6]),3)
pd.Series([1,2,3,2,2,3]).astype("category")
分享
点收藏
点点赞
点在看
文章转发自AI科技大本营微信公众号,版权归其所有。文章内容不代表本站立场和任何投资暗示。
Copyright © 2021.Company 元宇宙YITB.COM All rights reserved.元宇宙YITB.COM