读取表格数据,按照固定列排序-蒲公英云

读取表格数据,按照固定列排序

import csv
import pandas as pd
import numpy as np
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
'''
查找相同子公共子序列 获得索引
'''
def print_hi(name):
    list1 = []
    list2 = []
    name = []
    listfull1 = []
    listfull2 = []
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {
      name}')  # Press Ctrl+F8 to toggle the breakpoint.
    with open('/home/philtell/Desktop/0514_biogas_p1_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
        reader = csv.reader(csvfile)
        # list1 = [row for row in reader]
        list1 = [row[8] for row in reader]
    with open('/home/philtell/Desktop/0514_biogas_p1_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
        reader = csv.reader(csvfile)
        listfull1 = [row for row in reader]
    with open('/home/philtell/Desktop/0514_biogas_p2_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
        reader = csv.reader(csvfile)
        list2 = [row[8] for row in reader]
    with open('/home/philtell/Desktop/0514_biogas_p2_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
        reader = csv.reader(csvfile)
        listfull2 = [row for row in reader]
        # print(listfull2)
    listsize1 = len(list1)
    listsize2 = len(list2)
    mm = listsize2;
    nn = listsize1;
    sameIndex = []
    AsameNumIndex = set()  # 表格1 的具有相同seq的无序数据集合
    BsameNumIndex = []
    for i in range(1,mm):
        sub_list = []    # 表格1和表格2 具有相同seq 键值对
        sub_list.append(i)
        for j in range(1,nn):
            if(list2[i]==list1[j]):
                sub_list.append(j)
                AsameNumIndex.add(i)
                BsameNumIndex.append(j)
        sameIndex.append(sub_list)
    print(len(AsameNumIndex))
    print(len(sameIndex))
    print(sameIndex)
    newlist = []
    t1 = set()
    t2 = set()
    list3 = []
    for kk in sameIndex:
        j = 0
        for oo in kk:
            if len(kk)==2:
                if j==0:
                    # print(oo)
                    t2.add(oo)
                    # print(listfull2[oo])
                    list3.append(listfull2[oo])
                else:
                    t1.add(oo)
                    list3.append(listfull1[oo])
                    # print(listfull1[oo])
                j = j+1
    for i in range(1,len(list1)):
        if i not in t1:
            list3.append(listfull1[i])
    for i in range(1,len(list2)):
        if i not in t2:
            list3.append(listfull2[i])
    print(len(list3))
    # print(len(newlist))
    # print(newlist)
    # print(sameIndex)
    # sameIndex2 = []
    # for i in range(nn):
    #     for j in range(mm):
    #         if (list2[i] == list1[j]):
    #             sameIndex2.append(j)
    # print(len(sameIndex2))
    #
    '''
    '''
    # with open('/home/philtell/Desktop/0514_biogas_p1_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
    #     reader = csv.reader(csvfile)
    #     list1 = [row[8] for row in reader]
    df = pd.read_csv("/home/philtell/Desktop/0514_biogas_p2_asv.csv",low_memory=False)
    data = np.array(df.loc[:, :])
    labels = list(df.columns.values)
    # temp = []
    # temp.append("序号")
    # for la in labels:
    #     temp.append(la)
    # with open('/home/philtell/Desktop/0514_biogas_p2_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
    #     reader = csv.reader(csvfile)
    #     count = 0
    #     test = []
    #     print(type(reader))
    #     for row in reader:
    #         if count in sameIndex:
    #             test.append(row)
    #         count = count + 1
    #     # print(test)
    #     #
    print(list3)
    # print(len(test))
    print(labels)
    test = pd.DataFrame(data=list3)
    test.to_csv('/home/philtell/Desktop/mylove.csv',encoding='utf-8')
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    print_hi('PyCharm')
# See PyCharm help at https://www.jetbrains.com/help/pycharm/