[gemini] support amp o3 for gemini (#4872)

* [gemini] support no reuse fp16 chunk

* [gemini] support no master weight for optim

* [gemini] support no master weight for gemini ddp

* [test] update gemini tests

* [test] update gemini tests

* [plugin] update gemini plugin

* [test] fix gemini checkpointio test

* [test] fix gemini checkpoint io
This commit is contained in:
Hongxin Liu
2023-10-12 10:39:08 +08:00
committed by GitHub
parent c1fab951e7
commit df63564184
15 changed files with 222 additions and 114 deletions

View File

@@ -245,3 +245,13 @@ class ChunkManager:
chunk.release_chunk()
self.accessed_chunks.remove(chunk)
self.accessed_mem -= chunk.chunk_mem
def init_grad_chunk(self, chunk: Chunk) -> Chunk:
if chunk.grad_chunk is not None:
self.__sub_memory_usage(chunk.grad_chunk.memory_usage)
grad_chunk = chunk.init_grad_chunk()
self.__add_memory_usage(grad_chunk.memory_usage)
if grad_chunk not in self.accessed_chunks:
self.accessed_chunks.add(grad_chunk)
self.accessed_mem += grad_chunk.chunk_mem
return grad_chunk