数据挖掘 数据导入

前提准备

import pandas
# Series 一行或者列数字   形式 : 索引  值
# DataFrame 数据框,类似表格
a = pandas.Series([
    8,9,2,1
])
print(a)

b = pandas.Series([
    10,6,2,6
], index = [
    "one", "two", "three", "four"
])
print(b)


c = pandas.DataFrame([
    [5, 6, 3, 2],
    [23, 12, 3, 4],
    [12, 3, 4, 6]
], columns = [
    "I", "II", "III", "IIII"
])
print(c)

d = pandas.DataFrame({# 数据长度一定要相等
    "one": 1,#本身填充
    "two": [1, 5, 6], 
    "three": ["s", "w", "e"]
})
print(d)

print(c.head(2)) # 调取头部数据,默认前五行
print(c.tail(2)) # 调取尾部数据,默认后五行
print(c.describe()) #  统计数据情况
print(c.T) # 转置
"D:\Python 3.6.2\python.exe" E:/PyPro/DM/array_.py
['1' '12' 'sex' 'True' 'False']
<class 'numpy.ndarray'>
False
<class 'numpy.str_'>
[list(['3', 12, 'sex', True]) list(['2', 'SEX', False])
 list([12, 26, 5, 48, 7])]
<class 'numpy.ndarray'>
True
<class 'bool'>
[ 3 12 23 45 67]
67
[12 23]
<class 'numpy.ndarray'>
0    8
1    9
2    2
3    1
dtype: int64
one      10
two       6
three     2
four      6
dtype: int64
    I  II  III  IIII
0   5   6    3     2
1  23  12    3     4
2  12   3    4     6
   one three  two
0    1     s    1
1    1     w    5
2    1     e    6
    I  II  III  IIII
0   5   6    3     2
1  23  12    3     4
    I  II  III  IIII
1  23  12    3     4
2  12   3    4     6
               I         II       III  IIII
count   3.000000   3.000000  3.000000   3.0
mean   13.333333   7.000000  3.333333   4.0
std     9.073772   4.582576  0.577350   2.0
min     5.000000   3.000000  3.000000   2.0
25%     8.500000   4.500000  3.000000   3.0
50%    12.000000   6.000000  3.000000   4.0
75%    17.500000   9.000000  3.500000   5.0
max    23.000000  12.000000  4.000000   6.0
      0   1   2
I     5  23  12
II    6  12   3
III   3   3   4
IIII  2   4   6

Process finished with exit code 0

数据导入

import pandas
import pymysql
import bs4
"""
文件导入数据
"""
# data = pandas.read_csv("tb_record.csv")  # csv文件导入数据
# data = pandas.read_excel("tb_record.xls")  # xls文件导入
data = pandas.read_table("table.txt")# 文本导入
"""
数据库导入数据
"""
# link = pymysql.connect('localhost', 'root', '321708..', 'test', charset='utf8')
# sql = "select * from tb_record"
# data = pandas.read_sql(sql, link)

"""
网页读取表格,导入数据,bs4、html5lib依赖
注意爬虫的reboots协议
"""
# data = pandas.read_html("http://www.runoob.com/html/html-tables.html")
print(data)
# print(data.describe())
# print(data.sort_values(by = "Tid"))

特别注意一些依赖关系,比如bs4、html5lib等等。

猜你喜欢

转载自my.oschina.net/gain/blog/1796830