import csv
import pandas as pd
import numpy as np
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
'''
查找相同子公共子序列 获得索引
'''
def print_hi(name):
list1 = []
list2 = []
name = []
listfull1 = []
listfull2 = []
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {
name}') # Press Ctrl+F8 to toggle the breakpoint.
with open('/home/philtell/Desktop/0514_biogas_p1_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
reader = csv.reader(csvfile)
# list1 = [row for row in reader]
list1 = [row[8] for row in reader]
with open('/home/philtell/Desktop/0514_biogas_p1_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
reader = csv.reader(csvfile)
listfull1 = [row for row in reader]
with open('/home/philtell/Desktop/0514_biogas_p2_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
reader = csv.reader(csvfile)
list2 = [row[8] for row in reader]
with open('/home/philtell/Desktop/0514_biogas_p2_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
reader = csv.reader(csvfile)
listfull2 = [row for row in reader]
# print(listfull2)
listsize1 = len(list1)
listsize2 = len(list2)
mm = listsize2;
nn = listsize1;
sameIndex = []
AsameNumIndex = set() # 表格1 的具有相同seq的无序数据集合
BsameNumIndex = []
for i in range(1,mm):
sub_list = [] # 表格1和表格2 具有相同seq 键值对
sub_list.append(i)
for j in range(1,nn):
if(list2[i]==list1[j]):
sub_list.append(j)
AsameNumIndex.add(i)
BsameNumIndex.append(j)
sameIndex.append(sub_list)
print(len(AsameNumIndex))
print(len(sameIndex))
print(sameIndex)
newlist = []
t1 = set()
t2 = set()
list3 = []
for kk in sameIndex:
j = 0
for oo in kk:
if len(kk)==2:
if j==0:
# print(oo)
t2.add(oo)
# print(listfull2[oo])
list3.append(listfull2[oo])
else:
t1.add(oo)
list3.append(listfull1[oo])
# print(listfull1[oo])
j = j+1
for i in range(1,len(list1)):
if i not in t1:
list3.append(listfull1[i])
for i in range(1,len(list2)):
if i not in t2:
list3.append(listfull2[i])
print(len(list3))
# print(len(newlist))
# print(newlist)
# print(sameIndex)
# sameIndex2 = []
# for i in range(nn):
# for j in range(mm):
# if (list2[i] == list1[j]):
# sameIndex2.append(j)
# print(len(sameIndex2))
#
'''
'''
# with open('/home/philtell/Desktop/0514_biogas_p1_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
# reader = csv.reader(csvfile)
# list1 = [row[8] for row in reader]
df = pd.read_csv("/home/philtell/Desktop/0514_biogas_p2_asv.csv",low_memory=False)
data = np.array(df.loc[:, :])
labels = list(df.columns.values)
# temp = []
# temp.append("序号")
# for la in labels:
# temp.append(la)
# with open('/home/philtell/Desktop/0514_biogas_p2_asv.csv','r',encoding = "ISO-8859-1") as csvfile:
# reader = csv.reader(csvfile)
# count = 0
# test = []
# print(type(reader))
# for row in reader:
# if count in sameIndex:
# test.append(row)
# count = count + 1
# # print(test)
# #
print(list3)
# print(len(test))
print(labels)
test = pd.DataFrame(data=list3)
test.to_csv('/home/philtell/Desktop/mylove.csv',encoding='utf-8')
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm')
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
还没有评论,来说两句吧...