Scanning over a Store

The Scan family of APIs are very similar to List but they allow you to list Items across your entire Store. This can be useful scenarios such as:

Migrations and backfills that need to operate on every Item
Custom exporters to other datastores
Auditing/validation workflows
Deleting unwanted data
Building global aggregations (e.g. compute the top X blog posts by comments, or counting the number of items meeting some criteria)

Be warned that these operations can be slow and expensive, especially on large Stores. You should use them sparingly and consider using List instead if you can. These results of a Scan operation are not guaranteed to be in any particular order and Items with multiple key paths will only be returned once with their primary key path.

Beginning a Scan

Just like for a List operation, you begin by calling BeginScan, with your desired parameters. Then you can continue to retrieve more Items by calling ContinueScan with the token returned by BeginScan.

For this example we’ll use the schema defined in Example: Movies Schema, which defines these key paths (among others):

Item Type	Key Path Template
Movie	`/movie-:id`
Actor	`/actor-:id`


17 collapsed lines
1
package main
2

3
import (
4
  "context"
5
  "fmt"
6
  "os"
7
  "slices"
8
  "strconv"
9
  "time"
10

11
  "github.com/google/uuid"
12

13
  "github.com/StatelyCloud/go-sdk/stately"
14
  // This is the code you generated from schema
15
  "github.com/StatelyCloud/stately/go-sdk-sample/schema"
16
)
17

18
func sampleScan(
19
  ctx context.Context,
20
  client stately.Client,
21
) (*stately.ListToken, error) {
22
  iter, err := client.BeginScan(
23
    ctx,
24
    stately.ScanOptions{ItemTypes: []string{"Movie", "Actor"}},
25
  )
26
  if err != nil {
27
    return nil, err
28
  }
29

30
  for iter.Next() {
31
    item := iter.Value()
32
    switch v := item.(type) {
33
    case *schema.Movie:
34
      fmt.Printf("Movie Title: %s\n", v.GetTitle())
35
    case *schema.Actor:
36
      fmt.Printf("Actor Name: %s\n", v.GetName())
37
    }
38
  }
39
  // When we've exhausted the iterator, we'll get a token that we
40
  // can use to fetch the next page of items.
41
  return iter.Token()
42
}


17 collapsed lines
1
import cloud.stately.db.ListToken;
2
import cloud.stately.statelydb.Client;
3
import cloud.stately.statelydb.ListOptions;
4
import cloud.stately.statelydb.ListResult;
5
import cloud.stately.statelydb.PutRequest;
6
import cloud.stately.statelydb.ScanOptions;
7
import cloud.stately.statelydb.SyncResult;
8
import cloud.stately.statelydb.TransactionResult;
9
import cloud.stately.statelydb.schema.StatelyItem;
10
import java.util.List;
11
import java.util.UUID;
12
import schema.Actor;
13
import schema.Change;
14
import schema.Character;
15
import schema.Movie;
16

17

18
public static ListToken sampleScan(Client client) throws Exception {
19
  ScanOptions options = ScanOptions.builder().addItemType("Movie").addItemType("Actor").build();
20
  ListResult scanResult = client.beginScan(options).get();
21

22
  for (StatelyItem item : scanResult.getItems()) {
23
    if (item instanceof Movie) {
24
      Movie movie = (Movie) item;
25
      System.out.println("[Movie] title: " + movie.getTitle());
26
    } else if (item instanceof Actor) {
27
      Actor actor = (Actor) item;
28
      System.out.println("[Actor] name: " + actor.getName());
29
    }
30
  }
31

32
  // When we've exhausted the items, we'll get a token that we can
33
  // use to fetch the next page of items.
34
  return scanResult.getToken();
35
}


5 collapsed lines
1
require 'bundler/setup'
2
require_relative 'schema/stately'
3
require 'byebug'
4

5

6
def sample_scan(client)
7
  begin_scan_result, token = client.begin_scan(item_types: ['Movie', 'Actor'])
8

9
  begin_scan_result.each do |item|
10
    case item
11
    when StatelyDB::Types::Movie
12
      puts "[Movie] title: #{item.title}"
13
    when StatelyDB::Types::Actor
14
      puts "[Actor] name: #{item.name}"
15
    end
16
  end
17

18
  return token
19
end


11 collapsed lines
1
from __future__ import annotations
2

3
import asyncio
4
import os
5
from typing import TYPE_CHECKING
6

7
from statelydb import ListToken, SyncChangedItem, SyncDeletedItem, SyncReset, key_path
8

9
from .schema import Actor, Change, Character, Client, Movie
10

11

12
async def sample_scan(client: Client) -> None:
13
    scan_resp = await client.begin_scan(item_types=[Movie, Actor])
14

15
    async for item in scan_resp:
16
        if isinstance(item, Movie):
17
            print(f"[Movie] title: {item.title}")
18
        elif isinstance(item, Actor):
19
            print(f"[Actor] name: {item.name}")
20

21
    # When we've exhausted the iterator, we'll get a token that we can
22
    # use to fetch the next page of items.
23
    return scan_resp.token


7 collapsed lines
1
import { createClient, DatabaseClient, Movie } from "./schema/index.js";
2
import {
3
  nodeTransport,
4
  keyPath,
5
  ListToken,
6
} from "@stately-cloud/client/node";
7

8
async function sampleScan(client: DatabaseClient): Promise<ListToken> {
9
  let iter = client.beginScan({
10
    itemTypes: ["Movie", "Actor"],
11
  });
12
  for await (const item of iter) {
13
    if (client.isType(item, "Movie")) {
14
      console.log("Movie:", item.title);
15
    } else if (client.isType(item, "Actor")) {
16
      console.log("Actor:", item.name);
17
    }
18
  }
19
  return iter.token!;
20
}

1
stately item scan \
2
  --store-id <store-id-goes-here> \
3
  --item-types Movie,Actor

Using the List Token to Continue

The result from BeginScan includes a list token which you can use to continue in the ContinueScan. Read more about list tokens in Using the List Token to Continue. token.canSync will always be set to false for Scan operations.


17 collapsed lines
1
package main
2

3
import (
4
  "context"
5
  "fmt"
6
  "os"
7
  "slices"
8
  "strconv"
9
  "time"
10

11
  "github.com/google/uuid"
12

13
  "github.com/StatelyCloud/go-sdk/stately"
14
  // This is the code you generated from schema
15
  "github.com/StatelyCloud/stately/go-sdk-sample/schema"
16
)
17

18
func sampleContinueScan(
19
  ctx context.Context,
20
  client stately.Client,
21
  token *stately.ListToken,
22
) (*stately.ListToken, error) {
23
  iter, err := client.ContinueScan(ctx, token.Data)
24
  if err != nil {
25
    return nil, err
26
  }
27
  for iter.Next() {
28
    item := iter.Value()
29
    switch v := item.(type) {
30
    case *schema.Character:
31
      fmt.Printf("Character Name: %s\n", v.GetName())
32
    case *schema.Actor:
33
      fmt.Printf("Actor Name: %s\n", v.GetName())
34
    }
35
  }
36
  // You could save the token to call ContinueScan later.
37
  return iter.Token()
38
}


17 collapsed lines
1
import cloud.stately.db.ListToken;
2
import cloud.stately.statelydb.Client;
3
import cloud.stately.statelydb.ListOptions;
4
import cloud.stately.statelydb.ListResult;
5
import cloud.stately.statelydb.PutRequest;
6
import cloud.stately.statelydb.ScanOptions;
7
import cloud.stately.statelydb.SyncResult;
8
import cloud.stately.statelydb.TransactionResult;
9
import cloud.stately.statelydb.schema.StatelyItem;
10
import java.util.List;
11
import java.util.UUID;
12
import schema.Actor;
13
import schema.Change;
14
import schema.Character;
15
import schema.Movie;
16

17

18
public static ListToken sampleContinueScan(Client client, ListToken token) throws Exception {
19
  // Fetch the next page of items
20
  ListResult continueScanResult = client.continueScan(token).get();
21

22
  // Print out the paths of the next batch of listed items
23
  for (StatelyItem item : continueScanResult.getItems()) {
24
    if (item instanceof Movie) {
25
      Movie movie = (Movie) item;
26
      System.out.println("[Movie] title: " + movie.getTitle());
27
    } else if (item instanceof Actor) {
28
      Actor actor = (Actor) item;
29
      System.out.println("[Actor] name: " + actor.getName());
30
    }
31
  }
32

33
  // You could save the token to call ContinueScan later.
34
  return continueScanResult.getToken();
35
}


5 collapsed lines
1
require 'bundler/setup'
2
require_relative 'schema/stately'
3
require 'byebug'
4

5

6
def sample_continue_scan(client, token)
7
  # Fetch the next page of items
8
  continue_scan_result, token = client.continue_scan(token)
9

10
  continue_scan_result.each do |item|
11
    case item
12
    when StatelyDB::Types::Movie
13
      puts "[Movie] title: #{item.title}"
14
    when StatelyDB::Types::Actor
15
      puts "[Actor] name: #{item.name}"
16
    end
17
  end
18

19
  # You could save the token to call ContinueScan later.
20
  return token
21
end


11 collapsed lines
1
from __future__ import annotations
2

3
import asyncio
4
import os
5
from typing import TYPE_CHECKING
6

7
from statelydb import ListToken, SyncChangedItem, SyncDeletedItem, SyncReset, key_path
8

9
from .schema import Actor, Change, Character, Client, Movie
10

11

12
async def sample_continue_scan(client: Client, token: str) -> ListToken:
13
    # Fetch the next page of items
14
    continue_scan_result = await client.continue_scan(token)
15

16
    # Print out the paths of the next batch of listed items
17
    async for item in continue_scan_result:
18
        if isinstance(item, Movie):
19
            print(f"[Movie] title: {item.title}")
20
        elif isinstance(item, Actor):
21
            print(f"[Actor] name: {item.name}")
22

23
    # You could save the token to call ContinueScan later.
24
    return continue_scan_result.token


7 collapsed lines
1
import { createClient, DatabaseClient, Movie } from "./schema/index.js";
2
import {
3
  nodeTransport,
4
  keyPath,
5
  ListToken,
6
} from "@stately-cloud/client/node";
7

8
async function sampleContinueScan(
9
  client: DatabaseClient,
10
  token: ListToken,
11
): Promise<ListToken> {
12
  // You can call `collect` on the iterator to pull
13
  // all the items into an Array.
14
  const { items, token: newToken } = await client
15
    .continueScan(token)
16
    .collect();
17

18
  for (const item of items) {
19
    if (client.isType(item, "Movie")) {
20
      console.log("Movie:", item.title);
21
    } else if (client.isType(item, "Actor")) {
22
      console.log("Actor:", item.name);
23
    }
24
  }
25
  // You could save the token to call ContinueScan later.
26
  return newToken;
27
}

Filters

Scan results can also be limited by applying filters to the result set. Please note: filters are applied after the initial result set is fetched. This means that you are still charged for reading items which are filtered out. If scanning over an entire table to fetch only a small number of the items becomes a common pattern in your application, it may be more cost-effective to use a dedicated global index; reach out to learn more! There are two kinds of filters StatelyDB supports:

Item Type Filter

The first is an item type filter, which allows you to specify which item types you want to include in the result set. This is useful when there are multiple item types in the table that could be returned but when you only care about specific item types. If this filter is not specified, all item types found via the scan operation are included in the result set.

For example, the Example: Movies Schema, has 4 unique item types Movie, Character, Actor, and Change but we could scan the entire table for ALL movies by adding a filter on the Movie item type. We can set an itemType filter accordingly. Note: When an item has multiple key paths, it will only be returned once with its primary key path, even if multiple key paths match the item type filter.

To see language-specific examples of how to use item type filters in a Scan operation, see the example tables under CEL Expression Filter.

CEL Expression Filter

The second is a CEL expression filter, which allows you to specify any arbitrary conditions that an item type must satisfy to be included in the result set. CEL expression filters use the CEL language spec providing you with a powerful, flexible way to filter items based on their properties, relationships, and other criteria. For example, you could construct a filter that would include movies rated PG-13 less than 2 hours in duration, released in a year that is a multiple of 3.

CEL expression filters only apply to a single item type at a time and do not affect other item types in a result set. This means that if an item type isn’t mentioned in a CEL expression filter and there are no item type filter constraints, it will be included in the result set.

In the context of a CEL expression, the key-word this refers to the item being evaluated, and property properties should be accessed by the names as they appear in schema — not necessarily as they appear in the generated code for a particular language. For example, if you have a Movie item type with the property rating, you could write a CEL expression like this.rating == 'R' to return only movies that are rated R.

See the following examples for how to use filters in a Scan operation by language:


17 collapsed lines
1
package main
2

3
import (
4
  "context"
5
  "fmt"
6
  "os"
7
  "slices"
8
  "strconv"
9
  "time"
10

11
  "github.com/google/uuid"
12

13
  "github.com/StatelyCloud/go-sdk/stately"
14
  // This is the code you generated from schema
15
  "github.com/StatelyCloud/stately/go-sdk-sample/schema"
16
)
17

18
func sampleScanWithFilters(
19
  ctx context.Context,
20
  client stately.Client,
21
) (*stately.ListToken, error) {
22
  opts := stately.ScanOptions{}.
23
    // Fetch ONLY Movie itemTypes
24
    WithItemTypesToInclude("Movie").
25
    // This filter will ONLY return movies that are...
26
    // 1. Rated PG-13
27
    // 2. Have a duration of less than 2 hours
28
    // 3. Released in a year that is a multiple of 3
29
    WithCelExpressionFilter("Movie",
30
      "this.rating == 'PG-13' && this.duration < duration('2h').getSeconds() && this.year % 3 == 0")
31

32
  iter, err := client.BeginScan(ctx, opts)
33
  if err != nil {
34
    return nil, err
35
  }
36

37
  for iter.Next() {
38
    movie := iter.Value().(*schema.Movie) // We know this is a Movie because of the item type filter!
39
    fmt.Printf("Movie Title: %s\n", movie.GetTitle())
40
  }
41

42
  // When we've exhausted the iterator, we'll get a token that we
43
  // can use to fetch the next page of items.
44
  return iter.Token()
45
}


17 collapsed lines
1
import cloud.stately.db.ListToken;
2
import cloud.stately.statelydb.Client;
3
import cloud.stately.statelydb.ListOptions;
4
import cloud.stately.statelydb.ListResult;
5
import cloud.stately.statelydb.PutRequest;
6
import cloud.stately.statelydb.ScanOptions;
7
import cloud.stately.statelydb.SyncResult;
8
import cloud.stately.statelydb.TransactionResult;
9
import cloud.stately.statelydb.schema.StatelyItem;
10
import java.util.List;
11
import java.util.UUID;
12
import schema.Actor;
13
import schema.Change;
14
import schema.Character;
15
import schema.Movie;
16

17

18
public static ListToken sampleScanWithFilters(Client client) throws Exception {
19
  ScanOptions options = ScanOptions.builder().
20
      addItemType("Movie").
21
      addCelFilter("Movie", "this.rating == 'PG-13' && this.duration < duration('2h').getSeconds() && this.year % 3 == 0").
22
      build();
23
  ListResult scanResult = client.beginScan(options).get();
24

25
  for (StatelyItem item : scanResult.getItems()) {
26
      // Note: the following is just to illustrate that we know
27
      // the item is a Movie because we specified item_types in
28
      // the beginScan call.
29
      System.out.println("[Movie] title: " + ((Movie) item).getTitle());
30
  }
31

32
  // When we've exhausted the items, we'll get a token that we can
33
  // use to fetch the next page of items.
34
  return scanResult.getToken();
35
}


5 collapsed lines
1
require 'bundler/setup'
2
require_relative 'schema/stately'
3
require 'byebug'
4

5

6
def sample_scan_with_filters(client)
7
  begin_scan_result, token = client.begin_scan(
8
      item_types: ['Movie'],
9
      cel_filters: [
10
          ['Movie', "this.rating == 'PG-13' && this.duration < duration('2h').getSeconds() && this.year % 3 == 0"]
11
      ])
12

13
  begin_scan_result.each do |item|
14
    # Note! we know that the item is a 'Movie' because we specified
15
    # item_types=['Movie'] in the begin_list call.
16
    puts "[Movie] title: #{item.title}"
17
  end
18

19
  return token
20
end


11 collapsed lines
1
from __future__ import annotations
2

3
import asyncio
4
import os
5
from typing import TYPE_CHECKING
6

7
from statelydb import ListToken, SyncChangedItem, SyncDeletedItem, SyncReset, key_path
8

9
from .schema import Actor, Change, Character, Client, Movie
10

11

12
async def sample_scan_with_filters(client: Client) -> None:
13
    scan_resp = await client.begin_scan(
14
        item_types=[Movie],
15
        cel_filters=[
16
            [
17
                Movie,
18
                "this.rating == 'PG-13' && this.duration < duration('2h').getSeconds() && this.year % 3 == 0",
19
            ],
20
        ],
21
    )
22

23
    async for item in scan_resp:
24
        # Note! we know that the item is a 'Movie' because we specified
25
        # item_types=[Movie] in the begin_scan call.
26
        print(f"[Movie] title: {item.title}")
27

28
    # When we've exhausted the iterator, we'll get a token that we can
29
    # use to fetch the next page of items.
30
    return scan_resp.token


7 collapsed lines
1
import { createClient, DatabaseClient, Movie } from "./schema/index.js";
2
import {
3
  nodeTransport,
4
  keyPath,
5
  ListToken,
6
} from "@stately-cloud/client/node";
7

8
async function sampleScanWithFilters(
9
  client: DatabaseClient,
10
): Promise<ListToken> {
11
  let iter = client.beginScan({
12
    itemTypes: ["Movie"],
13
    celFilters: [
14
      [
15
        "Movie",
16
        "this.rating == 'PG-13' && this.duration < duration('2h').getSeconds() && this.year % 3 == 0",
17
      ],
18
    ],
19
  });
20

21
  for await (const item of iter) {
22
    // Note: `item` is guaranteed to be a Movie here
23
    // because of the `itemTypes: ["Movie"]` filter above.
24
    console.log("Movie:", (item as Movie).title);
25
  }
26
  return iter.token!;
27
}

1
#!/usr/bin/env bash
2

3
# begin-sample: update
4
stately item put \
5
  --store-id <store-id-goes-here> \
6
  --item-type 'Movie' \
7
  --item-data '{
8
    "id": "2hC3sMFFSlelJlFf9hRD9g",
9
    "title": "Starship Troopers",
10
    "rated": "R",
11
    "duration_seconds": 7740,
12
    "genre": "Sci-Fi",
13
    "year": 1997
14
  }'
15
# end-sample
16

17
# begin-sample: put
18
stately item put \
19
  --store-id <store-id-goes-here> \
20
  --item-type 'Movie' \
21
  --item-data '{
22
    "title": "Starship Troopers 2",
23
    "year": 2004,
24
    "genre": "Sci-Fi",
25
    "duration": 7880,
26
    "rating": "R",
27
  }'
28
# end-sample
29

30
# begin-sample: get
31
# There are no key path helpers for shell, so you need to
32
# manually base64-encode the UUID's bytes
33
stately item get \
34
  --store-id <store-id-goes-here> \
35
  --item-key '/movie-2hC3sMFFSlelJlFf9hRD9g'
36
# end-sample
37

38
# begin-sample: delete
39
# There are no key path helpers for shell, so you need to
40
# manually base64-encode the UUID's bytes
41
stately item delete \
42
  --store-id <store-id-goes-here> \
43
  --item-key '/movie-2hC3sMFFSlelJlFf9hRD9g'
44
# end-sample
45

46
# begin-sample: list
47
# There are no key path helpers for shell, so you need to
48
# manually base64-encode the UUID's bytes
49
stately item list \
50
  --store-id <store-id-goes-here> \
51
  --item-path-prefix '/movie-2hC3sMFFSlelJlFf9hRD9g'
52
# end-sample
53

54
# begin-sample: scan
55
stately item scan \
56
  --store-id <store-id-goes-here> \
57
  --item-types Movie,Actor
58
# end-sample

Limits

Pass a limit to BeginScan to limit the max number of items to retrieve. If limit is set to 0 then the first page of results will be returned which may be empty because all the results were filtered out. Be sure to check token.canContinue to see if there are more results to fetch.

Segmentation

Because a Scan operation can be slow for large data sets, you can segment the operation into smaller chunks by passing a totalSegments and segmentIndex parameter to BeginScan. This will allow you to run multiple Scan operations in parallel, each responsible for a different segment of the Store. You can split your scan into up to 1000000 segments. Note: Some backing storage layers may have limits on the number of segments they support.

Listing Across Client Upgrades

Just like for List operations, you are not able to use a list token across client versions.