Skip to content

Commit 697df4a

Browse files
feat: Implement Subscriber, which handles flow control and batch message processing. (#16)
* feat: Implement Subscriber, which handles flow control and batch message processing. Also ensure all asynchronous loopers are torn down when their underlying objects are.
1 parent 0a09bb3 commit 697df4a

7 files changed

Lines changed: 473 additions & 5 deletions

File tree

google/cloud/pubsublite/internal/wire/assigner_impl.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def __init__(self, initial: InitialPartitionAssignmentRequest,
3939

4040
async def __aenter__(self):
4141
await self._connection.__aenter__()
42+
return self
4243

4344
def _start_receiver(self):
4445
assert self._receiver is None
@@ -63,10 +64,11 @@ async def _receive_loop(self):
6364
for partition in response.partitions:
6465
partitions.add(Partition(partition))
6566
self._new_assignment.put_nowait(partitions)
66-
except asyncio.CancelledError:
67+
except (asyncio.CancelledError, GoogleAPICallError):
6768
return
6869

6970
async def __aexit__(self, exc_type, exc_val, exc_tb):
71+
await self._stop_receiver()
7072
await self._connection.__aexit__(exc_type, exc_val, exc_tb)
7173

7274
async def reinitialize(self, connection: Connection[PartitionAssignmentRequest, PartitionAssignment]):

google/cloud/pubsublite/internal/wire/committer_impl.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
from google.cloud.pubsublite.internal.wire.connection import Connection
1111
from google.cloud.pubsublite.internal.wire.serial_batcher import SerialBatcher, BatchTester
1212
from google.cloud.pubsublite_v1 import Cursor
13-
from google.cloud.pubsublite_v1.types import StreamingCommitCursorRequest, StreamingCommitCursorResponse, InitialCommitCursorRequest
13+
from google.cloud.pubsublite_v1.types import StreamingCommitCursorRequest, StreamingCommitCursorResponse, \
14+
InitialCommitCursorRequest
1415
from google.cloud.pubsublite.internal.wire.work_item import WorkItem
1516

1617

17-
class CommitterImpl(Committer, ConnectionReinitializer[StreamingCommitCursorRequest, StreamingCommitCursorResponse], BatchTester[Cursor]):
18+
class CommitterImpl(Committer, ConnectionReinitializer[StreamingCommitCursorRequest, StreamingCommitCursorResponse],
19+
BatchTester[Cursor]):
1820
_initial: InitialCommitCursorRequest
1921
_flush_seconds: float
2022
_connection: RetryingConnection[StreamingCommitCursorRequest, StreamingCommitCursorResponse]
@@ -38,6 +40,7 @@ def __init__(self, initial: InitialCommitCursorRequest, flush_seconds: float,
3840

3941
async def __aenter__(self):
4042
await self._connection.__aenter__()
43+
return self
4144

4245
def _start_loopers(self):
4346
assert self._receiver is None
@@ -71,7 +74,7 @@ async def _receive_loop(self):
7174
while True:
7275
response = await self._connection.read()
7376
self._handle_response(response)
74-
except asyncio.CancelledError:
77+
except (asyncio.CancelledError, GoogleAPICallError):
7578
return
7679

7780
async def _flush_loop(self):
@@ -83,6 +86,7 @@ async def _flush_loop(self):
8386
return
8487

8588
async def __aexit__(self, exc_type, exc_val, exc_tb):
89+
await self._stop_loopers()
8690
if self._connection.error():
8791
self._fail_if_retrying_failed()
8892
else:

google/cloud/pubsublite/internal/wire/routing_publisher.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def __init__(self, routing_policy: RoutingPolicy, publishers: Mapping[Partition,
1818
async def __aenter__(self):
1919
for publisher in self._publishers.values():
2020
await publisher.__aenter__()
21+
return self
2122

2223
async def __aexit__(self, exc_type, exc_val, exc_tb):
2324
for publisher in self._publishers.values():

google/cloud/pubsublite/internal/wire/single_partition_publisher.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def _partition(self) -> Partition:
4848

4949
async def __aenter__(self):
5050
await self._connection.__aenter__()
51+
return self
5152

5253
def _start_loopers(self):
5354
assert self._receiver is None
@@ -82,7 +83,7 @@ async def _receive_loop(self):
8283
while True:
8384
response = await self._connection.read()
8485
self._handle_response(response)
85-
except asyncio.CancelledError:
86+
except (asyncio.CancelledError, GoogleAPICallError):
8687
return
8788

8889
async def _flush_loop(self):
@@ -98,6 +99,7 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
9899
self._fail_if_retrying_failed()
99100
else:
100101
await self._flush()
102+
await self._stop_loopers()
101103
await self._connection.__aexit__(exc_type, exc_val, exc_tb)
102104

103105
def _fail_if_retrying_failed(self):
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from abc import abstractmethod
2+
from typing import AsyncContextManager
3+
from google.cloud.pubsublite_v1.types import SequencedMessage, FlowControlRequest
4+
5+
6+
class Subscriber(AsyncContextManager):
7+
"""
8+
A Pub/Sub Lite asynchronous wire protocol subscriber.
9+
"""
10+
@abstractmethod
11+
async def read(self) -> SequencedMessage:
12+
"""
13+
Read the next message off of the stream.
14+
15+
Returns:
16+
The next message.
17+
18+
Raises:
19+
GoogleAPICallError: On a permanent error.
20+
"""
21+
raise NotImplementedError()
22+
23+
@abstractmethod
24+
async def allow_flow(self, request: FlowControlRequest):
25+
"""
26+
Allow an additional amount of messages and bytes to be sent to this client.
27+
"""
28+
raise NotImplementedError()
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import asyncio
2+
from typing import Optional
3+
4+
from google.api_core.exceptions import GoogleAPICallError, FailedPrecondition
5+
6+
from google.cloud.pubsublite.internal.wire.connection import Request, Connection, Response, ConnectionFactory
7+
from google.cloud.pubsublite.internal.wire.connection_reinitializer import ConnectionReinitializer
8+
from google.cloud.pubsublite.internal.wire.flow_control_batcher import FlowControlBatcher
9+
from google.cloud.pubsublite.internal.wire.retrying_connection import RetryingConnection
10+
from google.cloud.pubsublite.internal.wire.subscriber import Subscriber
11+
from google.cloud.pubsublite_v1 import SubscribeRequest, SubscribeResponse, FlowControlRequest, SequencedMessage, \
12+
InitialSubscribeRequest, SeekRequest, Cursor
13+
14+
15+
class SubscriberImpl(Subscriber, ConnectionReinitializer[SubscribeRequest, SubscribeResponse]):
16+
_initial: InitialSubscribeRequest
17+
_token_flush_seconds: float
18+
_connection: RetryingConnection[SubscribeRequest, SubscribeResponse]
19+
20+
_outstanding_flow_control: FlowControlBatcher
21+
22+
_reinitializing: bool
23+
_last_received_offset: Optional[int]
24+
25+
_message_queue: 'asyncio.Queue[SequencedMessage]'
26+
27+
_receiver: Optional[asyncio.Future]
28+
_flusher: Optional[asyncio.Future]
29+
30+
def __init__(self, initial: InitialSubscribeRequest, token_flush_seconds: float,
31+
factory: ConnectionFactory[SubscribeRequest, SubscribeResponse]):
32+
self._initial = initial
33+
self._token_flush_seconds = token_flush_seconds
34+
self._connection = RetryingConnection(factory, self)
35+
self._outstanding_flow_control = FlowControlBatcher()
36+
self._reinitializing = False
37+
self._last_received_offset = None
38+
self._message_queue = asyncio.Queue()
39+
self._receiver = None
40+
self._flusher = None
41+
42+
async def __aenter__(self):
43+
await self._connection.__aenter__()
44+
return self
45+
46+
def _start_loopers(self):
47+
assert self._receiver is None
48+
assert self._flusher is None
49+
self._receiver = asyncio.ensure_future(self._receive_loop())
50+
self._flusher = asyncio.ensure_future(self._flush_loop())
51+
52+
async def _stop_loopers(self):
53+
if self._receiver:
54+
self._receiver.cancel()
55+
await self._receiver
56+
self._receiver = None
57+
if self._flusher:
58+
self._flusher.cancel()
59+
await self._flusher
60+
self._flusher = None
61+
62+
def _handle_response(self, response: SubscribeResponse):
63+
if "messages" not in response:
64+
self._connection.fail(FailedPrecondition("Received an invalid subsequent response on the subscribe stream."))
65+
return
66+
self._outstanding_flow_control.on_messages(response.messages.messages)
67+
for message in response.messages.messages:
68+
if self._last_received_offset is not None and message.cursor.offset <= self._last_received_offset:
69+
self._connection.fail(FailedPrecondition(
70+
"Received an invalid out of order message from the server. Message is {}, previous last received is {}.".format(
71+
message.cursor.offset, self._last_received_offset)))
72+
return
73+
self._last_received_offset = message.cursor.offset
74+
for message in response.messages.messages:
75+
# queue is unbounded.
76+
self._message_queue.put_nowait(message)
77+
78+
async def _receive_loop(self):
79+
try:
80+
while True:
81+
response = await self._connection.read()
82+
self._handle_response(response)
83+
except (asyncio.CancelledError, GoogleAPICallError):
84+
return
85+
86+
async def _try_send_tokens(self):
87+
req = self._outstanding_flow_control.release_pending_request()
88+
if req is None:
89+
return
90+
try:
91+
await self._connection.write(SubscribeRequest(flow_control=req))
92+
except GoogleAPICallError:
93+
# May be transient, in which case these tokens will be resent.
94+
pass
95+
96+
async def _flush_loop(self):
97+
try:
98+
while True:
99+
await asyncio.sleep(self._token_flush_seconds)
100+
await self._try_send_tokens()
101+
except asyncio.CancelledError:
102+
return
103+
104+
async def __aexit__(self, exc_type, exc_val, exc_tb):
105+
await self._stop_loopers()
106+
await self._connection.__aexit__(exc_type, exc_val, exc_tb)
107+
108+
async def reinitialize(self, connection: Connection[SubscribeRequest, SubscribeResponse]):
109+
self._reinitializing = True
110+
await self._stop_loopers()
111+
await connection.write(SubscribeRequest(initial=self._initial))
112+
response = await connection.read()
113+
if "initial" not in response:
114+
self._connection.fail(FailedPrecondition("Received an invalid initial response on the subscribe stream."))
115+
return
116+
if self._last_received_offset is not None:
117+
# Perform a seek to get the next message after the one we received.
118+
await connection.write(SubscribeRequest(seek=SeekRequest(cursor=Cursor(offset=self._last_received_offset + 1))))
119+
seek_response = await connection.read()
120+
if "seek" not in seek_response:
121+
self._connection.fail(FailedPrecondition("Received an invalid seek response on the subscribe stream."))
122+
return
123+
tokens = self._outstanding_flow_control.request_for_restart()
124+
if tokens is not None:
125+
await connection.write(SubscribeRequest(flow_control=tokens))
126+
self._reinitializing = False
127+
self._start_loopers()
128+
129+
async def read(self) -> SequencedMessage:
130+
return await self._connection.await_unless_failed(self._message_queue.get())
131+
132+
async def allow_flow(self, request: FlowControlRequest):
133+
self._outstanding_flow_control.add(request)
134+
if not self._reinitializing and self._outstanding_flow_control.should_expedite():
135+
await self._try_send_tokens()

0 commit comments

Comments
 (0)