目录

Apache Arrow 基础

目录

Vector 代码片段

CSS:

1
2
3
#result {
  color: gray;
}

HTML:

1
2
3
<script src="https://cdn.jsdelivr.net/npm/[email protected]/Arrow.es2015.min.js"></script>
<script src="/hello.js"></script>
<div id="result"></div>

JS:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
let { makeVector, RecordBatch } = Arrow;

const i32s = new Int32Array([1, 2, 3, 4, 5, 6, 7]);
let i32 = makeVector(i32s);

const batch = new RecordBatch({ name: i32.data[0] });
i32 = batch.getChildAt(0);
log("表格第一行数据:" + i32.get(0));

log("列名称:" + batch.schema.fields[0].name);
运行结果:

Dictionary 代码片段

CSS:

1
2
3
#result {
  color: gray;
}

HTML:

1
2
<script src="https://cdn.jsdelivr.net/npm/[email protected]/Arrow.es2015.min.js"></script>
<div id="result"></div>

JS:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
let { makeVector, Field,RecordBatch,Dictionary,vectorFromArray,Utf8,Int32 } = Arrow;

const dictionary = ['foo', 'bar', 'baz'];
const dictionaryVec = vectorFromArray(dictionary, new Utf8).memoize();
let indices = Array.from({length: dictionary.length}, (v, i) => i)

let type = new Field('dict', new Dictionary(new Utf8(), new Int32()))

const vector = makeVector({
            data: indices,
            dictionary: dictionaryVec,
            type: new Dictionary(dictionaryVec.type, new Int32)
        });

log("baz的索引:"+vector.indexOf("baz"))
运行结果:

Struct 代码片段

CSS:

1
2
3
#result {
  color: gray;
}

HTML:

1
2
<script src="https://cdn.jsdelivr.net/npm/[email protected]/Arrow.es2015.min.js"></script>
<div id="result"></div>

JS:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
let properties = [
  { name: "jacob", age: 22 },
  { name: "Ben", age: 33, city: "SF" },
];

let newProperties = [{ name: "sean", age: 31 }];
let { Schema, Field, Float32, Struct, Utf8, Vector, vectorFromArray } = Arrow;

let propertyVec = vectorFromArray(properties);
let newPropertyVec = vectorFromArray(newProperties);

let concatedVec = propertyVec.concat(newPropertyVec);
log("全部的Vector:<br/>" + propertyVec);
log("第一行数据的名字:<br/>" + propertyVec.get(0).name);
log("追加后的数据:<br/>" + concatedVec);
运行结果:

Schema 代码片段

CSS:

1
2
3
#result {
  color: gray;
}

HTML:

1
2
<script src="https://cdn.jsdelivr.net/npm/[email protected]/Arrow.es2015.min.js"></script>
<div id="result"></div>

JS:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
let properties = [
  { name: "jacob", age: 22 },
  { name: "Ben", age: 33, city: "SF" },
];

let newProperties = [{ name: "sean", age: 31 }];
let {
  Table,
  Schema,
  RecordBatch,
  Field,
  Float32,
  Int32,
  Struct,
  Utf8,
  Vector,
  makeData,
  vectorFromArray,
  makeTable,
  List
} = Arrow;

const values = [1, 2];
const vector = vectorFromArray(values);

const nodes = [
  {
    id: "1",
    color: 0xfff000,
    label: "Movie",
    position: { x: 1, y: 2, z: 3 },
    properties: {
      name: "jacob",
      age: 22,
    },
  },
  {
    id: "2",
    color: 0xfff000,
    label: "Movie",
    position: { x: 1, y: 2, z: 3 },
    properties: {
      name: "test",
      age: 22,
    },
  },
];

const nodesVector = vectorFromArray(nodes);

  const labels = [["User", "Person"], ["3", "4", "5"]];
    const labelVector = vectorFromArray(labels,new List(new Field("label",new Utf8)) );
let fields = [Field.new("id",new Int32()),Field.new("label",new List(new Field("label",new Utf8)) ),Field.new("nodes",new Struct())]
let schema = new Schema(fields)
const data = makeData({ type: new Struct(schema.fields), children:[vector,labelVector,nodesVector] });
let recordBatch = new RecordBatch(schema,data)

// const batch = new RecordBatch({id:idVec,nodes:nodesVec});
let graphTable = new Table([recordBatch])

log("label:"+graphTable.getChild("nodes").get(1).label)
let labels2 = graphTable.getChild("label").get(0)
log("List:"+labels2)
运行结果:

Table 代码片段

CSS:

1
2
3
#result {
  color: gray;
}

HTML:

1
2
<script src="https://cdn.jsdelivr.net/npm/[email protected]/Arrow.es2015.min.js"></script>
<div id="result"></div>

JS:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
let properties = [
  { name: "jacob", age: 22 },
  { name: "Ben", age: 33, city: "SF" },
];

let newProperties = [{ name: "sean", age: 31 }];
let {
  Table,
  Schema,
  RecordBatch,
  Field,
  Float32,
  Int32,
  Struct,
  Utf8,
  Vector,
  vectorFromArray,
  makeTable,
} = Arrow;

const values = [1, 2];
const vector = vectorFromArray(values);

const nodes = [
  {
    id: "1",
    color: 0xfff000,
    label: "Movie",
    position: { x: 1, y: 2, z: 3 },
    properties: {
      name: "jacob",
      age: 22,
    },
  },
  {
    id: "2",
    color: 0xfff000,
    label: "Movie",
    position: { x: 1, y: 2, z: 3 },
    properties: {
      name: "test",
      age: 22,
    },
  },
];

const nodesVector = vectorFromArray(nodes);

const batch = new RecordBatch({id:vector ,nodes: nodesVector });
let graphTable = new Table([batch])
let ns = graphTable.getChild("nodes")
log("第一行数据的label:" + ns.get(0).label);

const newId = [3,4]
const newNodes = [
  {
    id: "3",
    color: 0xfff000,
    label: "Movie",
    position: { x: 1, y: 2, z: 3 },
    properties: {
      name: "Rob",
      age: 22,
    },
  },
  {
    id: "4",
    color: 0xfff000,
    label: "Movie",
    position: { x: 1, y: 2, z: 3 },
    properties: {
      name: "Susana",
      age: 22,
    },
  },
];

let newIdVec = vectorFromArray(newId)
let neweNodeVec = vectorFromArray(newNodes)
const newBatch = new RecordBatch({id:newIdVec ,nodes: neweNodeVec });
const newTable = new Table([newBatch])
let newNodesVec = newTable.getChild("nodes")
log("新Table的property:" + newNodesVec.get(0).properties.name);


let concatTable = graphTable.concat(newTable)
let concatVec = concatTable.getChild("nodes")

log("concat table's last row properties:"+concatTable.getChild("nodes").get(3).properties.name)

//Change property's value
let jacobProperty = concatTable.getChild("nodes").get(0).properties
jacobProperty.name = "S"
concatTable.getChild("nodes").get(0).properties = jacobProperty

log("rename:"+concatTable.getChild("nodes").get(0).properties.name)
运行结果:

引用

字符串的存储格式

ApacheArrow 物理内存布局

ApacheArrow Graph Schema