Python mode for Processingにおいて、時系列データのハンドリングを整備する。
Pythonには、もっと便利なライブラリーがありそうだが、ここではJAVA ProcessingコードをできるだけそのままPythonへ変換する。
まずは、Tableクラスを更新したFloatTableクラスのPythonへの翻訳
FloatTable.py 修正
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
class FloatTable(): def __init__(self, filename): self.filename = filename self.rowCount = 0 self.columnCount = 0 self.data = [] self.data2 = [] self.rowNames = [] self.columnNames =[] self.pieces = [] self.rowsTable = loadStrings(self.filename) self.columns = split(self.rowsTable[0], TAB) self.columnNames = subset(self.columns, 1) #self.scrubQuotes(self.columnNames) self.columnCount = len(self.columnNames) #rowNames = new String[rowsTable.length-1] #data = new float[rowsTable.length-1][] for i in range(1, len(self.rowsTable)): if len(self.rowsTable[i].strip()) == 0: continue if self.rowsTable[i].startswith('#'): continue self.pieces = split(self.rowsTable[i], TAB) self.data = [self.pieces[j] for j in range(len(self.pieces))] self.data2.append(self.data) self.rowNames.append(self.pieces[0]) print "rowNames=:" print self.rowNames[self.rowCount] #print self.data2[self.rowCount] self.rowCount+=1 #def scrubQuotes(self, array): # for i in range(len(array)): # if (len(array[i]) > 2): # if (array[i].startsWith("\"") and array[i].endsWith("\"")): # array[i] = array[i].substring(1, array[i].length() - 1) # array[i] = array[i].replaceAll("\"\"", "\"") def getRowCount(self): return self.rowCount def getRowName(self, rowIndex): return self.rowNames[rowIndex] def getRowNames(self): return self.rowNames def getRowIndex(self, name): for i in range(self.rowCount+1): if (rowNames[i].equals(name)): return i return -1 def getColumnCount(self): return self.columnCount def getColumnName(self, colIndex): return self.columnNames[colIndex] def getColumnNames(self): return self.columnNames def getFloat(self, rowIndex, col): #if ((rowIndex < 0) or (rowIndex >= self.data.length)): # throw new RuntimeException("There is no row " + rowIndex) #if ((col < 0) or (col >= self.data[rowIndex].length)): # throw new RuntimeException("Row " + rowIndex + " does not have a column " + col) return float(self.data2[rowIndex][col+1]) def isValid(self, row, col): if (row < 0): return False if (row >= self.rowCount): return False if (col >= len(self.data2[row])): return False if (col < 0): return False return True def getColumnMin(self, col): #m = Float.MAX_VALUE m = 0 for row in range(self.rowCount+1): if (isValid(row, col)): if (float(self.data2[row][col]) < m): m = flaot(self.data2[row][col]) return m def getColumnMax(self, col): #m = -Float.MAX_VALUE m = 0 for row in range(self.rowCount1+1): if (isValid(row, col)): if (float(self.data2[row][col]) > m): m = float(self.data2[row][col]) return m def getRowMin(self, row): #m = Float.MAX_VALUE m = 0 for col in range(self.columnCount+1): if (isValid(row, col)): if (float(self.data2[row][col]) < m): m = float(self.data2[row][col]) return m def getRowMax(self, row): #m = -Float.MAX_VALUE m = 0 for col in range(self.columnCount+1): if (isValid(row, col)): if (float(self.data2[row][col]) > m): m = float(self.data2[row][col]) return m def getTableMin(self): #m = Float.MAX_VALUE m = 0 for row in range(self.rowCount+1): for col in range(1, self.columnCount+1): if (isValid(row, col)): if (float(self.data2[row][col]) < m): m = float(self.data2[row][col]) return m def getTableMax(self): #m = -Float.MAX_VALUE m = 0 print self.rowCount print self.columnCount for row in range(self.rowCount+1): for col in range(1, self.columnCount+1): if (self.isValid(row, col)): if (float(self.data2[row][col]) > m): m = float(self.data2[row][col]) return m |
Pythonのコード:setup()関数の中で、
1 |
data = FloatTable.FloatTable("milk-tea-coffee.tsv") |
で、データをFloatテーブルクラスのdataに収める。
data一列目の年yearについては、
1 |
years_str = data.getRowNames() |
で、一旦year_str配列に収める。
次に
1 2 |
for i in range(len(years_str)): years.append(int(years_str[i])) |
で整数に変換して、配列yearsに収め直す。
1 |
years.append(int(data.getRowNames())) |
と一気にしたいところだが、unicodeの問題か、int()がハンドリングできず、エラーコードがでる。
drawDataPoint(col)関数にて、一番目のデータ列(dataの2列目)col=0を指定して、
1 2 3 4 5 6 7 8 9 10 |
rowCount = data.getRowCount() for row in range(rowCount): #if (data.isValid(row, col)): value = data.getFloat(row, col) x = map(years[row], yearMin, yearMax, plotX1, plotX2) y = map(value, dataMin, dataMax, plotY2, plotY1) point(x, y) |
データポイントをプロットする。その際、前もって
1 |
dataMax = data.getTableMax() |
により、データの最大値を取得することで、プロットが描画範囲に収まるようにする。
figure_01_just_point_py.pyde
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import FloatTable width = 720 height = 405 plotX1 = 50 plotX2 = width - plotX1 plotY1 = 60 plotY2 = height - plotY1 years =[] value =[] dataMin = 0 def setup(): global width global height global data global dataMin global dataMax global value global years global years_str global yearMin global yearMax global plotX1 global plotX2 global plotY1 global plotY2 size(width, height) data = FloatTable.FloatTable("milk-tea-coffee.tsv") years_str = data.getRowNames() for i in range(len(years_str)): years.append(int(years_str[i])) print "years=:" + str(years) yearMin = years[0] yearMax = years[len(years) - 1] #Corners of the plotted time series smooth() def draw(): global plotX1 global plotX2 global plotY1 global plotY2 background(224) #Show the plot area as a white box fill(255) rectMode(CORNERS) noStroke() rect(plotX1, plotY1, plotX2, plotY2) strokeWeight(5) #Draw the data for the first column stroke('#5679C1') drawDataPoints(0) def drawDataPoints(col): global data global dataMin global dataMax global years global yearMin global yearMax global plotX1 global plotX2 global plotY1 global plotY2 dataMax = data.getTableMax() rowCount = data.getRowCount() for row in range(rowCount): if (data.isValid(row, col)): value = data.getFloat(row, col) x = map(years[row], yearMin, yearMax, plotX1, plotX2) y = map(value, dataMin, dataMax, plotY2, plotY1) point(x, y) |
データファイルの一行目のフィールド名を変数に収めて、
draw()関数の中で、以下のコード
1 2 3 4 |
fill(0) textSize(20) title = data.getColumnName(currentColumn) text(title, plotX1, plotY1 - 10) |
で、図の左上にタイトルとして表示させる。
figure_02_plot_title_py.pyde
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import FloatTable width = 720 height = 405 years =[] value =[] currentColumn = 0 dataMin = 0 def setup(): global width global height global data global dataMin global dataMax global value global years global years_str global yearMin global yearMax global plotX1 global plotX2 global plotY1 global plotY2 global currentColumn global columnCount size(width, height) data = FloatTable.FloatTable("milk-tea-coffee.tsv") columnCount = data.getColumnCount() years_str = data.getRowNames() for i in range(len(years_str)): years.append(int(years_str[i])) print "years=:" + str(years) yearMin = years[0] yearMax = years[len(years) - 1] #Corners of the plotted time series plotX1 = 50 plotX2 = width - plotX1 plotY1 = 60 plotY2 = height - plotY1 smooth() def draw(): global data global plotX1 global plotX2 global plotY1 global plotY2 global currentColumn background(224) #Show the plot area as a white box fill(255) rectMode(CORNERS) noStroke() rect(plotX1, plotY1, plotX2, plotY2) fill(0) textSize(20) title = data.getColumnName(currentColumn) text(title, plotX1, plotY1 - 10) strokeWeight(5) #Draw the data for the first column stroke('#5679C1') drawDataPoints(currentColumn) def drawDataPoints(col): global rowCount global data global dataMin global dataMax global years global yearMin global yearMax global plotX1 global plotX2 global plotY1 global plotY2 dataMax = data.getTableMax() rowCount = data.getRowCount() for row in range(rowCount): #if (data.isValid(row, col)): value = data.getFloat(row, col) x = map(years[row], yearMin, yearMax, plotX1, plotX2) y = map(value, dataMin, dataMax, plotY2, plotY1) point(x, y) |