Basics of python (numpy, pandas, matplotlib)

Numpy library:
create a ndarray simply call NumPy array of functions to:
numpy.array (object, dtype = None, Copy = True, the Order = None, subok = False, ndmin = 0)
Name Description
object or nested array the number of columns
of data types dtype array elements, optional
copy object requires replication, optional
order created style array, C is the row direction, F is the column direction, a is an arbitrary direction (default)
subok consistent return a default base class array
ndmin specified minimum dimension of the array generated

NumPy data type :
name description
bool Boolean data type (True False or)
int default integer type (similar to the C language long, int32 or Int64)
INTC C of the same type int generally int32 or 64 int
the INTP for indexing integer type (similar to an ssize_t C, under normal circumstances remains int32 or Int64)
int8 byte (-128 to 127)
Int16 integer (-32768 to 32767)
int32 integer ( 2147483647 to -2147483648)
int64 integer(-9223372036854775808 to 9223372036854775807)
uint8 unsigned integers (0 to 255)
UInt16 unsigned integer (0 to 65535)
UInt32 unsigned integer (0 to 4294967295)
UInt64 unsigned integer (0 to 18446744073709551615)
float_ float64 type short
float16 half precision floating-point number, comprising: a sign bit, five-bit exponent, 10 mantissa bits
float32 single precision floating point number, comprising: a sign bit, eight exponent, 23 mantissa bits
float64 double precision floating point
, comprising: 1 a sign bit, 11 exponent bits, 52 mantissa bits
complex_ complex128 type short, i.e., 128 complex
complex64 complex, showing dual 32-bit floating-point (real number part and imaginary number part)
complex128 complex, represents a (real dual 64-bit floating point number part and an imaginary part)
value type dtype numpy fact an instance of the object, and corresponds to a unique character, including np.bool_, np.int32, np.float32, etc.
configured random data:
X = np.zeros ((2 ,. 3), DTYPE = int)
numpy.ones (Shape, DTYPE = None, Order = 'C')

Pandas

1.1 DateFrame back again to insert a new one

import pandas  as pd
path="D:\Python 基础\Test\逻辑回归-信用卡欺诈检测"+os.sep +"creditcard.csv"
data= pd.read_csv(path)
data['A']=None  #再DataFrame后面插入新的一列
print(data)

Here Insert Picture Description
1.2pd.concat added after two DataFrame disadvantage of this approach is that the specified position

import pandas  as pd
path="D:\Python 基础\Test\逻辑回归-信用卡欺诈检测"+os.sep +"creditcard.csv"
data= pd.read_csv(path)
print(pd.concat([data, pd.DataFrame(columns=list('AB'))]))

Here Insert Picture Description
1.3 application list.insert way list.insert (index, obj), in the specified location

import pandas  as pd
path="D:\Python 基础\Test\逻辑回归-信用卡欺诈检测"+os.sep +"creditcard.csv"
data= pd.read_csv(path)
col_name = data.columns.tolist()
col_name.insert(1,'A')
df=pd.DataFrame(data,columns=col_name)

1.4 A front column Column B

import pandas  as pd
path="D:\Python 基础\Test\逻辑回归-信用卡欺诈检测"+os.sep +"creditcard.csv"
data= pd.read_csv(path)
col_name = data.columns.tolist()
col_name.insert(1,'A')
df=pd.DataFrame(data,columns=col_name)
col_name = df.columns.tolist()
col_name.insert(col_name.index('A'),'B')# 在 A列前面插入B
print(df.reindex(columns=col_name))

Insert a constant 1.5 at the specified location

import pandas  as pd
path="D:\Python 基础\Test\逻辑回归-信用卡欺诈检测"+os.sep +"creditcard.csv"
data= pd.read_csv(path)
data.insert(0,"Ones",1)
print(data)

Here Insert Picture Description
1.6 Remove Columns

import pandas  as pd
path="D:\Python 基础\Test\逻辑回归-信用卡欺诈检测"+os.sep +"creditcard.csv"
data= pd.read_csv(path)
print(data.drop(columns = ['V1','V2']))

1.7 statistical frequency of each type of values ​​in a column that appears

class_number=data.loc[:,'Classnew'].value_counts()#统计某一列各个数值得出现次数
count_classes = pd.value_counts(data['Classnew'], sort = True).sort_index()

Linear / graph

#直线/曲线图
x=np.linspace(-1,1,20)#再(-1,1)之间等间隔选取20个点
y1=2*x+1
y2=x**2+1
fig=plt.figure(figsize=(5,5))#绘制一个
plt.plot(x,y1,c="red",label="y1=2*x+1",linewidth=1.0, linestyle='-.')#线宽和线的样式
plt.plot(x,y2,c="blue",label="y2=x**2+1",linewidth=1.0, linestyle='--')
font = {'family': 'Times New Roman', 'weight': 'normal', 'size': 15}#设置的图列标注的字体
plt.xlabel("x",font)
plt.ylabel("y",font)
plt.title("The value of y",font)
plt.xlim((-0.5, 0.5)) # x刻度范围
plt.ylim((-1, 3)) # y刻度范围
plt.xticks(np.linspace(-0.5, 0.5, 5)) # 重定义x轴显示刻度值
plt.yticks([0,0.5,0.75,1.5,2.0,2.5])# 重定义y轴显示刻度值
axy = plt.gca() # 获取当前坐标轴信息
axy.spines['right'].set_color('none') # 设置右边框为空
axy.spines['top'].set_color('none') # 设置顶边框为空
axy.xaxis.set_ticks_position('bottom') # 设置x轴坐标刻度数字的位置为bottom(还有top\both\default\none)
axy.spines['bottom'].set_position(('data', 0)) # 把bottom边框移到y=0的位置 (位置属性:outward\axes\data)
axy.yaxis.set_ticks_position('left') # 设置y轴坐标刻度数字为left
axy.spines['left'].set_position(('data', 0)) # 把left边框移动到x=0的位置
# 画点和垂直线,并对点进行标注
x0 = 0.4
y0 = 2*x0 + 1

plt.plot([x0, x0,], [0, y0,], 'k--', linewidth=2.5) # 画出一条垂直于x轴的虚线.
plt.scatter([x0, ], [y0, ], s=50, color='b') # 画点,s代表点的大小
plt.annotate(r'$x*2+1=%s$' % y0,     # 标注的文字
             xy=(x0, y0),           # 标注的数据点
             xycoords='data',       # 基于数据的值来选位置
             xytext=(+20, -30),     # 标注的位置(相对坐标位置)
             textcoords='offset points',   # xy偏差值
             fontsize=12,           # 字体大小
             arrowprops=dict(arrowstyle='->', connectionstyle="arc3, rad=.3")) # 连线类型设置为箭头,有弧度, rad为弧度值

# 添加注释
plt.text(0.4, -1.8, r'$Text:\ \mu\ \sigma_i\ \alpha_t.\ written\ by\ likejiao.$')
plt.legend(loc='best',prop=font)
plt.show()

Here Insert Picture Description

Histogram

#柱状图
import matplotlib.pyplot as plt
import numpy as np 

n = 12 # 生成n个数据
X = np.arange(n) # X为0到n-1的整数
print(np.random.uniform(0.5, 1.0, n))#{0.5,1.0}区间产生12个产生具有均匀分布的数组
Y1 = (1 - X / float(n)) * np.random.uniform(0.5, 1.0, n) # Y1和Y2都是随机分布的数据
Y2 = (1 - X / float(n)) * np.random.uniform(0.5, 1.0, n)
plt.bar(X, +Y1) # 画在坐标轴上方
plt.bar(X, -Y2) # 画在坐标轴下方
# 设置横纵坐标的边界,去掉横坐标的线
plt.xlim(-.5, n)
plt.xticks(np.linspace(-.5, n,12))
plt.ylim(-1.25, 1.25)
plt.yticks(())
# 加颜色优化图像,facecolor设置主体颜色,edgecolor设置边框颜色
plt.bar(X, +Y1, facecolor='#9999ff', edgecolor='white')
plt.bar(X, -Y2, facecolor='#ff9999', edgecolor='white')
# 加数值优化图像
for x, y in zip(X, Y1):
    # ha: horizontal alignment 横向对齐
    # va: vertical alignment 纵向对齐
    plt.text(x + 0.4, y + 0.05, '%.2f' % y, ha='center', va='bottom')

for x, y in zip(X, Y2):
    # ha: horizontal alignment
    # va: vertical alignment
    plt.text(x + 0.4, -y - 0.05, '%.2f' % y, ha='center', va='top')
plt.show()

Here Insert Picture Description

Scatter

#绘制散点图
import matplotlib.pyplot as plt
import numpy as np 

n = 1024 # 数据集的大小,下面两行生产1024个呈正态分布的二维数据组(平均数是0,方差是1)
X = np.random.normal(0, 1, n) # 每一个点的X值
Y = np.random.normal(0, 1, n) # 每一个点的Y值
T = np.arctan2(Y, X) # 计算每一个点的颜色

plt.scatter(X, Y, s=75, c=T, alpha=.5) # s:size, c:color, alpha透明度为50%

plt.xlim(-1.5, 1.5)
#plt.xticks(()) # 隐藏x坐标轴
plt.ylim(-1.5, 1.5)
#plt.yticks(()) # 隐藏y坐标轴

# 最后展示图片
plt.show()

Here Insert Picture Description

3D map

#绘制3D图像
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

# 先定义一个图像窗口,在窗口上添加3D坐标轴
fig = plt.figure()
ax = Axes3D(fig)

# 给X, Y赋值
X = np.arange(-4, 4, 0.25)
Y = np.arange(-4, 4, 0.25)
X, Y = np.meshgrid(X, Y)    # x-y 平面的网格
R = np.sqrt(X ** 2 + Y ** 2)
# 计算高度值
Z = np.sin(R)
# Z = (1 - X / 2 + X**5 + Y**3) * np.exp(-X**2 -Y**2) # drawcontours.py里的等高线函数

# 三维曲面,并将一个 colormap rainbow 填充颜色,之后将三维图像投影到 XY 平面上做一个等高线图。
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow')) # rstride:row的跨度, cstride:column的跨度

# 添加XY平面的等高线
ax.contourf(X, Y, Z, zdir='z', offset=-2, cmap=plt.get_cmap('rainbow')) # zdir:沿着z轴方向投影, offset相对于z=0的偏移距离

# 设置z轴的坐标范围,显得好看一点
ax.set_zlim(-2,2)

# 最后展示图片
plt.show()

Here Insert Picture Description

Released eight original articles · won praise 0 · Views 300

Guess you like

Origin blog.csdn.net/qq_41627642/article/details/104515242