Series, DataFrame, and Index# In[1]
import numpy as np 
import pandas as pd# In[2]
data=pd.Series([0.25,0.5,0.75,1.0])
data# Out[2]
0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64values and index attributes# In[3]
print(data.values)
print(data.index)# Out[3]
[0.25 0.5  0.75 1.  ]
RangeIndex(start=0, stop=4, step=1)# In[4]
print(data[1])
print(data[1:3])# Out[4]
0.5
1    0.50
2    0.75
dtype: float64# In[5]
data=pd.Series([0.25,0.5,0.75,1.0],index=['b','a','d','c'])
data# Out[5]
b    0.25
a    0.50
d    0.75
c    1.00
dtype: float64# In[6]
data['b']# Out[6]
0.25# In[7]
population_dict={'California':39538223,'Texas':29145505,'Florida':21538187,'New York':20201249,'Pennsylvania':13002700}
population=pd.Series(population_dict)
population# Out[7]
California      39538223
Texas           29145505
Florida         21538187
New York        20201249
Pennsylvania    13002700
dtype: int64# In[8]
population['California']# Out[8]
39538223# In[9]
population['California':'Florida']# Out[9]
California    39538223
Texas         29145505
Florida       21538187
dtype: int64pd.Series(data,index=index)index is an optional argument, and data can be one of may entitiesdata can be a list or Numpy array like this# In[10]
pd.Series([2,4,6])# Out[10]
0    2
1    4
2    6
dtype: int64-data can be a scalar, which is repeated to fill the specified index
# In[11]
pd.Series(5,index=[100,200,300])# Out[11]
100    5
200    5
300    5
dtype: int64# In[12]
pd.Series({2:'a',1:'b',3:'c'})# Out[12]
2    a
1    b
3    c
dtype: object# In[13]
pd.Series({2:'a',1:'b',3:'c'},index=[1,2])# Out[13]
1    b
2    a
dtype: object# In[14]
area_dict={'California':423967,'Texas':695662,'Florida':170312,'New York':141297,'Pennsylvania':119280}
area=pd.Series(area_dict)
area# Out[14]
California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
dtype: int64# In[15]
states=pd.DataFrame({'population':population,'area':area})
states# Out[15]
              population	  area
California	    39538223	423967
Texas	        29145505	695662
Florida	        21538187	170312
New York	    20201249	141297
Pennsylvania	13002700	119280# In[16]
states.index# Out[16]
Index(['California', 'Texas', 'Florida', 'New York', 'Pennsylvania'], dtype='object')# In[17]
states.columns# Out[17]
Index(['population', 'area'], dtype='object')# In[18]
states['area']# Out[18]
California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
Name: area, dtype: int64# In[19]
pd.DataFrame(population,columns=['population'])# Out[19]
              population
California	    39538223
Texas	        29145505
Florida	        21538187
New York	    20201249
Pennsylvania	13002700# In[20]
data=[{'a':i,'b':2*i} for i in range(3)]
pd.DataFrame(data)# Out[20]
    a	b
0	0	0
1	1	2
2	2	4NaN(Not a Number) values.# In[21]
pd.DataFrame([{'a':1,'b':2},{'b':3,'c':4}])# Out[21]
      a	b	  c
0	1.0	2	NaN
1	NaN	3	4.0# In[15]# In[22]
pd.DataFrame(np.random.rand(3,2),columns=['foo','bar'],index=['a','b','c'])# Out[22]
         foo	     bar
a	0.466496	0.888614
b	0.228347	0.613272
c	0.912784	0.961023# In[23]
A=np.zeros(3,dtype=[('A','i8'),('B','f8')])
A# Out[23]
array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])# In[24]
pd.DataFrame(A)# Out[24]
    A	  B
0	0	0.0
1	0	0.0
2	0	0.0# In[25]
ind=pd.Index([2,3,5,7,11])
ind# Out[25]
Int64Index([2, 3, 5, 7, 11], dtype='int64')# In[26]
print(ind[1])
print(ind[::2])
print(ind.size, ind.shape, ind.ndim, ind.dtype)# Out[26]
3
Int64Index([2, 5, 11], dtype='int64')
5 (5,) 1 int64set data structure, so that unions, intersections, differences, and other combinations can be computed in a familiar way.# In[27]
indA=pd.Index([1,3,5,7,9])
indB=pd.Index([2,3,5,7,11])# In[28]
print(indA.intersection(indB))
print(indA.union(indB))
print(indA.symmetric_difference(indB))# Out[28]
Int64Index([3, 5, 7], dtype='int64')
Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')
Int64Index([1, 2, 9, 11], dtype='int64')
글이 많은 도움이 되었습니다, 감사합니다.