# In[1]
def make_df(cols,ind):
    data={c:[str(c)+str(i) for i in ind] for c in cols}
    return pd.DataFrame(data,ind)
# In[2]
class display(object):
    """Display HTML representation of multiple objects"""
    template="""<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}{1}
    """
    def __init__(self,*args):
        self.args=args
    
    def _repr_html_(self):
        return '\n'.join(self.template.format(a,eval(a)._repr_html_()) 
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a+'\n'+repr(eval(a))
                         for a in self.args)
Recall: Concatenation of Numpy Arrays
Simple Concatenation with pd.concat
- pd.concatfunction provides a similar syntax to- np.concatentebut contains a number of options.
pd.concat(objs, axis=0, join='outer', ignore_index=False, keys=None, 
levels=None, names=None, verify_integrity=False, sort=False, copy=True)
- It can be used for a simple concatenation of Series and DataFrame objects, just as np.concatenatecan be used for simple concatenation of arrays.
# In[3]
ser1=pd.Series(['A','B','C'],index=[1,2,3])
ser2=pd.Series(['D','E','F'],index=[4,5,6])
pd.concat([ser1,ser2])
# Out[3]
1    A
2    B
3    C
4    D
5    E
6    F
dtype: object
# In[4]
df1=make_df('AB',[1,2])
df2=make_df('AB',[3,4])
display('df1','df2','pd.concat([df1,df2])')
# Out[4]
df1
     A	 B
1	A1	B1
2	A2	B2
df2
     A	 B
3	A3	B3
4	A4	B4
pd.concat([df1,df2])
     A	 B
1	A1	B1
2	A2	B2
3	A3	B3
4	A4	B4
- It's default behavior is to concatenate row-wise(axis=0) within the DataFrame.
- Like np.concatenate,pd.concatallows specification of an axis along which concatenation will take place.
# In[5]
df3=make_df('AB',[0,1])
df4=make_df('CD',[0,1])
display('df3','df4',"pd.concat([df3,df4],axis='columns')")
# Out[5]
df3
     A 	 B
0	A0	B0
1	A1	B1
df4
     C	 D
0	C0	D0
1	C1	D1
pd.concat([df3,df4],axis='columns')
     A	 B	 C	 D
0	A0	B0	C0	D0
1	A1	B1	C1	D1
Duplicate Indices
- One important difference between np.concatenateandpd.concatis that Pandas concatenation preserves indices, even if the result will have duplicate(복사된) indices.
# In[6]
x=make_df('AB',[0,1])
y=make_df('AB',[2,3])
y.index=x.index
display('x','y','pd.concat([x,y])')
# Out[6]
x
     A	 B
0	A0	B0
1	A1	B1
y
     A	 B
0	A2	B2
1	A3	B3
pd.concat([x,y])
     A	 B
0	A0	B0
1	A1	B1
0	A2	B2
1	A3	B3
Treating repeated indices as an error
- If you'd like to simply verify that the indices in the result of pd.concatdo not overlap, you can include theverify_integrityflag.
- With this set to True, the concatenation will raise an exception if there are duplicate indices.
Ignoring the index
- Sometimes the index itself does not matter, and you would prefer it to simply be ignored.
- This option can be specified using the ignore_indexflag.
- With this set to True, the concatenation will create a new integer for the resulting DataFrame.
# In[7]
display('x','y','pd.concat([x,y],ignore_index=True)')
# Out[7]
x
     A	 B
0	A0	B0
1	A1	B1
y
     A	 B
0	A2	B2
1	A3	B3
pd.concat([x,y],ignore_index=True)
     A	 B
0	A0	B0
1	A1	B1
2	A2	B2
3	A3	B3
Adding MultiIndex keys
- Another option is to use the keys option to specify a label for the data sources.
- The result will be a hierarchically indexed series containing the data
# In[8]
display('x','y',"pd.concat([x,y], keys=['x','y'])")
# Out[8]
x
     A	 B
0	A0	B0
1	A1	B1
y
     A	 B
0	A2	B2
1	A3	B3
pd.concat([x,y], keys=['x','y'])
         A	 B
x	0	A0	B0
    1	A1	B1
y	0	A2	B2
    1	A3	B3
Concatenation with Joins
- Data from different sources might have different sets of column names, and pd.concatoffers several options in this case.
# In[9]
df5=make_df('ABC',[1,2])
df6=make_df('BCD',[3,4])
display('df5','df6','pd.concat([df5,df6])')
# Out[9]
df5
     A	 B	 C
1	A1	B1	C1
2	A2	B2	C2
df6
     B	 C	 D
3	B3	C3	D3
4	B4	C4	D4
pd.concat([df5,df6])
     A	 B	 C	  D
1	 A1	B1	C1	NaN
2	 A2	B2	C2	NaN
3	NaN	B3	C3	 D3
4	NaN	B4	C4	 D4
- The default behavior is to fill entries for which no data is available with NA values.
- To change this, we can adjust the joinparameter of theconcatfunction.
- By default, the join is a union of the input columns, but we can change this to an intersection of the columns using join='inner'
# In[10]
display('df5','df6',"pd.concat([df5,df6], join='inner')")
# Out[10]
df5
     A	 B	 C
1	A1	B1	C1
2	A2	B2	C2
df6
     B	 C	 D
3	B3	C3	D3
4	B4	C4	D4
pd.concat([df5,df6], join='inner')
     B	 C
1	B1	C1
2	B2	C2
3	B3	C3
4	B4	C4
- Another useful pattern is to use the reindexmethod before concatenate for finer control over which columns are dropped.
# In[11]
pd.concat([df5, df6.reindex(df5.columns, axis=1)])
# Out[11]
     A	 B	 C
1	A1	B1	C1
2	A2	B2	C2
3	NaN	B3	C3
4	NaN	B4	C4
The append Method
- Because direct array concatenation is so common, Series and DataFrame objects have an appendmethod that can accomplish the same thing in fewer keystrokes.
# In[12]
display('df1','df2','df1.append(df2)')
# Out[12]
df1
     A	 B
1	A1	B1
2	A2	B2
df2
     A	 B
3	A3	B3
4	A4	B4
df1.append(df2)
     A	 B
1	A1	B1
2	A2	B2
3	A3	B3
4	A4	B4
- Keep in mind that unlike the appendandextendmethods of Python lists, theappendmethod in Pandas does not modify the original object; instead it creates a new object with the combined data.
- It also is not a very efficient method, because it involves creation of a new index and data buffer.
- Notice that appendmethod is deprecated and will be removed from pandas in a future version. Usepd.concatinstead.