import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(10**5, 10**7, (5,3)), columns=list('abc'), dtype=np.int64)
df
|
a |
b |
c |
0 |
2368596 |
282593 |
7649457 |
1 |
6486779 |
5348256 |
790672 |
2 |
8468404 |
4682970 |
2904873 |
3 |
2271514 |
2908642 |
9272301 |
4 |
7811256 |
3652968 |
6715015 |
df.dtypes
a int64
b int64
c int64
dtype: object
df['a'] = df['a'].astype(float)
df.dtypes
a float64
b int64
c int64
dtype: object
## 이렇게 온전한 숫자 타입에서 다른 숫자타입으로 변경시에는 'astype'을 사용
## 하지만 온전한 숫자 타입이 아닐 경우에는 ??
df.loc[1, 'b'] = 'xxxxx'
df
|
a |
b |
c |
0 |
2368596.0 |
282593 |
7649457 |
1 |
6486779.0 |
xxxxx |
790672 |
2 |
8468404.0 |
4682970 |
2904873 |
3 |
2271514.0 |
2908642 |
9272301 |
4 |
7811256.0 |
3652968 |
6715015 |
df['b'] = df['b'].astype(float)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-11-06b413f6e48a> in <module>
----> 1 df['b'] = df['b'].astype(float)
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
5880 # else, only a single dtype is given
5881 new_data = self._data.astype(
-> 5882 dtype=dtype, copy=copy, errors=errors, **kwargs
5883 )
5884 return self._constructor(new_data).__finalize__(self)
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in astype(self, dtype, **kwargs)
579
580 def astype(self, dtype, **kwargs):
--> 581 return self.apply("astype", dtype=dtype, **kwargs)
582
583 def convert(self, **kwargs):
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
557
558 def astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
--> 559 return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs)
560
561 def _astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
641 # _astype_nansafe works fine with 1-d only
642 vals1d = values.ravel()
--> 643 values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
644
645 # TODO(extension)
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
727 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
728 # Explicit copy, or required since NumPy can't view from / to object.
--> 729 return arr.astype(dtype, copy=True)
730
731 return arr.view(dtype)
ValueError: could not convert string to float: 'xxxxx'
## 이럴때에 to_numeric 사용
df['b'] = pd.to_numeric(df['b'], errors='coerce')
df
|
a |
b |
c |
0 |
2368596.0 |
282593.0 |
7649457 |
1 |
6486779.0 |
NaN |
790672 |
2 |
8468404.0 |
4682970.0 |
2904873 |
3 |
2271514.0 |
2908642.0 |
9272301 |
4 |
7811256.0 |
3652968.0 |
6715015 |
df.dtypes
a float64
b float64
c int64
dtype: object